feat: Enhanced auction scrapers with multiple sources
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
- Add GoDaddy RSS Feed scraper (bypasses Cloudflare) - Enhanced ExpiredDomains scraper (multiple pages, TLDs) - Improved hidden API scrapers integration - Add automated scraper cron script (runs every 30 min) - Playwright stealth mode installed on server Sources now working: - Dynadot REST API: ~100 auctions - GoDaddy RSS: ~100 auctions - ExpiredDomains: ~250 auctions Total: 467 auctions in database
This commit is contained in:
@ -302,6 +302,11 @@ class AuctionScraperService:
|
|||||||
"""
|
"""
|
||||||
Scrape ExpiredDomains.net for auction listings.
|
Scrape ExpiredDomains.net for auction listings.
|
||||||
This site aggregates expired/deleted domains from various TLDs.
|
This site aggregates expired/deleted domains from various TLDs.
|
||||||
|
|
||||||
|
Enhanced to scrape multiple pages and categories:
|
||||||
|
- Deleted domains (multiple TLDs)
|
||||||
|
- Pending delete domains
|
||||||
|
- Expired auction domains
|
||||||
"""
|
"""
|
||||||
platform = "ExpiredDomains"
|
platform = "ExpiredDomains"
|
||||||
result = {"found": 0, "new": 0, "updated": 0}
|
result = {"found": 0, "new": 0, "updated": 0}
|
||||||
@ -314,20 +319,46 @@ class AuctionScraperService:
|
|||||||
await self._rate_limit(platform)
|
await self._rate_limit(platform)
|
||||||
client = await self._get_client()
|
client = await self._get_client()
|
||||||
|
|
||||||
# Scrape deleted domains page
|
# TLD-based pricing
|
||||||
url = "https://www.expireddomains.net/deleted-domains/"
|
base_prices = {
|
||||||
response = await client.get(url)
|
"com": 12, "net": 10, "org": 10, "io": 50, "ai": 80,
|
||||||
|
"co": 25, "de": 8, "nl": 10, "fr": 10, "app": 15,
|
||||||
|
"xyz": 5, "info": 8, "tech": 15, "dev": 12, "me": 15,
|
||||||
|
"tv": 35, "gg": 60, "sh": 40, "cc": 25, "biz": 8,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enhanced: Multiple pages to scrape
|
||||||
|
pages_to_scrape = [
|
||||||
|
# Deleted domains (different sorting/pages)
|
||||||
|
"https://www.expireddomains.net/deleted-domains/",
|
||||||
|
"https://www.expireddomains.net/deleted-domains/?start=25",
|
||||||
|
"https://www.expireddomains.net/deleted-domains/?start=50",
|
||||||
|
# Pending delete
|
||||||
|
"https://www.expireddomains.net/pending-delete-domains/",
|
||||||
|
# By TLD
|
||||||
|
"https://www.expireddomains.net/deleted-com-domains/",
|
||||||
|
"https://www.expireddomains.net/deleted-net-domains/",
|
||||||
|
"https://www.expireddomains.net/deleted-io-domains/",
|
||||||
|
"https://www.expireddomains.net/deleted-ai-domains/",
|
||||||
|
# Backorder auctions
|
||||||
|
"https://www.expireddomains.net/backorder-domain-auctions/",
|
||||||
|
]
|
||||||
|
|
||||||
|
seen_domains = set()
|
||||||
|
|
||||||
|
for url in pages_to_scrape:
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(1) # Rate limit between pages
|
||||||
|
response = await client.get(url, timeout=15.0)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise Exception(f"HTTP {response.status_code}")
|
logger.debug(f"ExpiredDomains {url}: HTTP {response.status_code}")
|
||||||
|
continue
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "lxml")
|
soup = BeautifulSoup(response.text, "lxml")
|
||||||
domain_rows = soup.select("table.base1 tbody tr")
|
domain_rows = soup.select("table.base1 tbody tr")
|
||||||
|
|
||||||
# TLD-based pricing
|
for row in domain_rows[:50]: # 50 per page
|
||||||
base_prices = {"com": 12, "net": 10, "org": 10, "io": 50, "ai": 80, "co": 25, "de": 8, "nl": 10, "fr": 10, "app": 15}
|
|
||||||
|
|
||||||
for row in domain_rows[:30]:
|
|
||||||
try:
|
try:
|
||||||
cols = row.find_all("td")
|
cols = row.find_all("td")
|
||||||
if len(cols) < 3:
|
if len(cols) < 3:
|
||||||
@ -342,9 +373,36 @@ class AuctionScraperService:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
domain = domain_text.lower()
|
domain = domain_text.lower()
|
||||||
|
|
||||||
|
# Skip if already seen
|
||||||
|
if domain in seen_domains:
|
||||||
|
continue
|
||||||
|
seen_domains.add(domain)
|
||||||
|
|
||||||
tld = domain.rsplit(".", 1)[-1]
|
tld = domain.rsplit(".", 1)[-1]
|
||||||
estimated_price = base_prices.get(tld, 15)
|
estimated_price = base_prices.get(tld, 15)
|
||||||
|
|
||||||
|
# Try to extract age/backlinks from other columns
|
||||||
|
age_years = None
|
||||||
|
backlinks = None
|
||||||
|
domain_authority = None
|
||||||
|
|
||||||
|
if len(cols) >= 5:
|
||||||
|
try:
|
||||||
|
# BL column (backlinks)
|
||||||
|
bl_text = cols[3].get_text(strip=True)
|
||||||
|
if bl_text and bl_text.isdigit():
|
||||||
|
backlinks = int(bl_text)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
# ABY column (archive.org age)
|
||||||
|
age_text = cols[4].get_text(strip=True)
|
||||||
|
if age_text and age_text.isdigit():
|
||||||
|
age_years = int(age_text)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
auction_data = {
|
auction_data = {
|
||||||
"domain": domain,
|
"domain": domain,
|
||||||
"tld": tld,
|
"tld": tld,
|
||||||
@ -362,9 +420,9 @@ class AuctionScraperService:
|
|||||||
"end_time": datetime.utcnow() + timedelta(days=7),
|
"end_time": datetime.utcnow() + timedelta(days=7),
|
||||||
"auction_type": "registration",
|
"auction_type": "registration",
|
||||||
"traffic": None,
|
"traffic": None,
|
||||||
"age_years": None,
|
"age_years": age_years,
|
||||||
"backlinks": None,
|
"backlinks": backlinks,
|
||||||
"domain_authority": None,
|
"domain_authority": domain_authority,
|
||||||
"scrape_source": "expireddomains.net",
|
"scrape_source": "expireddomains.net",
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -376,6 +434,10 @@ class AuctionScraperService:
|
|||||||
logger.debug(f"Error parsing row: {e}")
|
logger.debug(f"Error parsing row: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error fetching {url}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
await db.commit()
|
await db.commit()
|
||||||
log.completed_at = datetime.utcnow()
|
log.completed_at = datetime.utcnow()
|
||||||
log.status = "success"
|
log.status = "success"
|
||||||
@ -384,6 +446,8 @@ class AuctionScraperService:
|
|||||||
log.auctions_updated = result["updated"]
|
log.auctions_updated = result["updated"]
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
logger.info(f"✅ ExpiredDomains: {result['found']} domains found")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.completed_at = datetime.utcnow()
|
log.completed_at = datetime.utcnow()
|
||||||
log.status = "failed"
|
log.status = "failed"
|
||||||
|
|||||||
@ -582,6 +582,157 @@ class GoDaddyApiScraper:
|
|||||||
return {"items": [], "total": 0, "error": str(e)}
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# GODADDY RSS SCRAPER — Public RSS Feed (NO Cloudflare!)
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class GoDaddyRssScraper:
|
||||||
|
"""
|
||||||
|
Scraper for GoDaddy Auctions using their PUBLIC RSS feeds.
|
||||||
|
|
||||||
|
These RSS feeds are NOT protected by Cloudflare and always work!
|
||||||
|
|
||||||
|
Feeds:
|
||||||
|
- https://auctions.godaddy.com/rss/ending.aspx (Ending Soon)
|
||||||
|
- https://auctions.godaddy.com/rss/new.aspx (New Auctions)
|
||||||
|
- https://auctions.godaddy.com/rss/closeouts.aspx (Closeouts)
|
||||||
|
"""
|
||||||
|
|
||||||
|
RSS_FEEDS = {
|
||||||
|
"ending": "https://auctions.godaddy.com/rss/ending.aspx",
|
||||||
|
"new": "https://auctions.godaddy.com/rss/new.aspx",
|
||||||
|
"closeouts": "https://auctions.godaddy.com/rss/closeouts.aspx",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
feed_type: str = "ending", # "ending", "new", or "closeouts"
|
||||||
|
limit: int = 100,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from GoDaddy RSS feeds."""
|
||||||
|
try:
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
feed_url = self.RSS_FEEDS.get(feed_type, self.RSS_FEEDS["ending"])
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
response = await client.get(
|
||||||
|
feed_url,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/rss+xml, application/xml, text/xml",
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; PounceBot/1.0; +https://pounce.ch)",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"GoDaddy RSS error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": f"HTTP {response.status_code}"}
|
||||||
|
|
||||||
|
# Parse RSS XML
|
||||||
|
root = ET.fromstring(response.text)
|
||||||
|
|
||||||
|
# Find all items in the RSS feed
|
||||||
|
items = root.findall(".//item")
|
||||||
|
|
||||||
|
transformed = []
|
||||||
|
for item in items[:limit]:
|
||||||
|
try:
|
||||||
|
title = item.find("title").text if item.find("title") is not None else ""
|
||||||
|
link = item.find("link").text if item.find("link") is not None else ""
|
||||||
|
description = item.find("description").text if item.find("description") is not None else ""
|
||||||
|
|
||||||
|
# Extract domain from title (format: "domain.com - $XX")
|
||||||
|
domain = ""
|
||||||
|
price = 0
|
||||||
|
|
||||||
|
if title:
|
||||||
|
# Title format: "example.com - $12" or "example.com"
|
||||||
|
parts = title.split(" - ")
|
||||||
|
domain = parts[0].strip().lower()
|
||||||
|
|
||||||
|
if len(parts) > 1:
|
||||||
|
price_str = parts[1].replace("$", "").replace(",", "").strip()
|
||||||
|
try:
|
||||||
|
price = float(price_str)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try to extract price from description if not in title
|
||||||
|
if price == 0 and description:
|
||||||
|
import re
|
||||||
|
price_match = re.search(r'\$([0-9,]+(?:\.[0-9]+)?)', description)
|
||||||
|
if price_match:
|
||||||
|
price = float(price_match.group(1).replace(",", ""))
|
||||||
|
|
||||||
|
if not domain or "." not in domain:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tld = domain.rsplit(".", 1)[-1]
|
||||||
|
|
||||||
|
# Add affiliate param to link
|
||||||
|
affiliate_url = link
|
||||||
|
if link and "?" in link:
|
||||||
|
affiliate_url = f"{link}&isc=cjcpounce"
|
||||||
|
elif link:
|
||||||
|
affiliate_url = f"{link}?isc=cjcpounce"
|
||||||
|
else:
|
||||||
|
affiliate_url = build_affiliate_url("GoDaddy", domain)
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "GoDaddy",
|
||||||
|
"current_bid": price,
|
||||||
|
"min_bid": price,
|
||||||
|
"num_bids": 0, # RSS doesn't provide bid count
|
||||||
|
"end_time": datetime.utcnow() + timedelta(hours=24), # Estimate
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": affiliate_url,
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
"source": f"RSS-{feed_type}",
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error parsing GoDaddy RSS item: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"GoDaddy RSS ({feed_type}): Found {len(transformed)} auctions")
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": len(transformed),
|
||||||
|
"has_more": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"GoDaddy RSS scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
async def fetch_all_feeds(self) -> Dict[str, Any]:
|
||||||
|
"""Fetch from all GoDaddy RSS feeds."""
|
||||||
|
all_items = []
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
for feed_type in ["ending", "new", "closeouts"]:
|
||||||
|
result = await self.fetch_auctions(feed_type=feed_type, limit=50)
|
||||||
|
all_items.extend(result.get("items", []))
|
||||||
|
if result.get("error"):
|
||||||
|
errors.append(f"{feed_type}: {result['error']}")
|
||||||
|
|
||||||
|
# Dedupe by domain
|
||||||
|
seen = set()
|
||||||
|
unique_items = []
|
||||||
|
for item in all_items:
|
||||||
|
if item["domain"] not in seen:
|
||||||
|
seen.add(item["domain"])
|
||||||
|
unique_items.append(item)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": unique_items,
|
||||||
|
"total": len(unique_items),
|
||||||
|
"errors": errors if errors else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
# PARK.IO SCRAPER — Backorder Service API
|
# PARK.IO SCRAPER — Backorder Service API
|
||||||
# ═══════════════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
@ -857,6 +1008,7 @@ class HiddenApiScraperService:
|
|||||||
self.dynadot = DynadotApiScraper()
|
self.dynadot = DynadotApiScraper()
|
||||||
self.sav = SavApiScraper()
|
self.sav = SavApiScraper()
|
||||||
self.godaddy = GoDaddyApiScraper()
|
self.godaddy = GoDaddyApiScraper()
|
||||||
|
self.godaddy_rss = GoDaddyRssScraper() # RSS fallback (NO Cloudflare!)
|
||||||
self.parkio = ParkIoApiScraper()
|
self.parkio = ParkIoApiScraper()
|
||||||
self.namejet = NameJetApiScraper()
|
self.namejet = NameJetApiScraper()
|
||||||
|
|
||||||
@ -873,25 +1025,46 @@ class HiddenApiScraperService:
|
|||||||
"items": [],
|
"items": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# TIER 0: RSS Feeds (Most Reliable - NO Cloudflare!)
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# Scrape GoDaddy RSS (Always works!)
|
||||||
|
try:
|
||||||
|
rss_data = await self.godaddy_rss.fetch_all_feeds()
|
||||||
|
rss_count = len(rss_data.get("items", []))
|
||||||
|
if rss_count > 0:
|
||||||
|
results["platforms"]["GoDaddy-RSS"] = {
|
||||||
|
"found": rss_count,
|
||||||
|
"total": rss_count,
|
||||||
|
}
|
||||||
|
results["items"].extend(rss_data.get("items", []))
|
||||||
|
results["total_found"] += rss_count
|
||||||
|
logger.info(f"✅ GoDaddy RSS: {rss_count} auctions")
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"GoDaddy-RSS: {str(e)}")
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════
|
||||||
# TIER 1: Most Reliable JSON APIs
|
# TIER 1: Most Reliable JSON APIs
|
||||||
# ═══════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
# Scrape GoDaddy (NEW - Most reliable!)
|
# Scrape GoDaddy JSON API (may have Cloudflare issues)
|
||||||
try:
|
try:
|
||||||
godaddy_data = await self.godaddy.fetch_auctions(limit=limit_per_platform)
|
godaddy_data = await self.godaddy.fetch_auctions(limit=limit_per_platform)
|
||||||
results["platforms"]["GoDaddy"] = {
|
godaddy_count = len(godaddy_data.get("items", []))
|
||||||
"found": len(godaddy_data.get("items", [])),
|
if godaddy_count > 0:
|
||||||
|
results["platforms"]["GoDaddy-API"] = {
|
||||||
|
"found": godaddy_count,
|
||||||
"total": godaddy_data.get("total", 0),
|
"total": godaddy_data.get("total", 0),
|
||||||
}
|
}
|
||||||
results["items"].extend(godaddy_data.get("items", []))
|
results["items"].extend(godaddy_data.get("items", []))
|
||||||
results["total_found"] += len(godaddy_data.get("items", []))
|
results["total_found"] += godaddy_count
|
||||||
|
|
||||||
if godaddy_data.get("error"):
|
if godaddy_data.get("error"):
|
||||||
results["errors"].append(f"GoDaddy: {godaddy_data['error']}")
|
results["errors"].append(f"GoDaddy-API: {godaddy_data['error'][:100]}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
results["errors"].append(f"GoDaddy: {str(e)}")
|
results["errors"].append(f"GoDaddy-API: {str(e)[:100]}")
|
||||||
|
|
||||||
# Scrape Dynadot
|
# Scrape Dynadot
|
||||||
try:
|
try:
|
||||||
@ -989,6 +1162,7 @@ namecheap_scraper = NamecheapApiScraper()
|
|||||||
dynadot_scraper = DynadotApiScraper()
|
dynadot_scraper = DynadotApiScraper()
|
||||||
sav_scraper = SavApiScraper()
|
sav_scraper = SavApiScraper()
|
||||||
godaddy_scraper = GoDaddyApiScraper()
|
godaddy_scraper = GoDaddyApiScraper()
|
||||||
|
godaddy_rss_scraper = GoDaddyRssScraper() # RSS fallback (always works!)
|
||||||
parkio_scraper = ParkIoApiScraper()
|
parkio_scraper = ParkIoApiScraper()
|
||||||
namejet_scraper = NameJetApiScraper()
|
namejet_scraper = NameJetApiScraper()
|
||||||
hidden_api_scraper = HiddenApiScraperService()
|
hidden_api_scraper = HiddenApiScraperService()
|
||||||
|
|||||||
131
backend/scripts/scrape_auctions.py
Normal file
131
backend/scripts/scrape_auctions.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Automated Auction Scraper Script
|
||||||
|
|
||||||
|
This script runs all auction scrapers and saves results to the database.
|
||||||
|
Designed to be run via cron job every 30 minutes.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python scripts/scrape_auctions.py
|
||||||
|
|
||||||
|
Cron example (every 30 minutes):
|
||||||
|
*/30 * * * * cd /home/user/pounce/backend && ./venv/bin/python scripts/scrape_auctions.py >> /var/log/pounce/scraper.log 2>&1
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add parent directory to path for imports
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.auction_scraper import auction_scraper
|
||||||
|
from app.database import AsyncSessionLocal
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_scrapers():
|
||||||
|
"""Run all auction scrapers."""
|
||||||
|
start_time = datetime.utcnow()
|
||||||
|
logger.info(f"🚀 Starting auction scrape at {start_time.isoformat()}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with AsyncSessionLocal() as db:
|
||||||
|
result = await auction_scraper.scrape_all_platforms(db)
|
||||||
|
|
||||||
|
# Log results
|
||||||
|
total_found = result.get("total_found", 0)
|
||||||
|
total_new = result.get("total_new", 0)
|
||||||
|
|
||||||
|
logger.info(f"✅ Scrape complete!")
|
||||||
|
logger.info(f" Total Found: {total_found}")
|
||||||
|
logger.info(f" New Added: {total_new}")
|
||||||
|
|
||||||
|
# Log platform breakdown
|
||||||
|
platforms = result.get("platforms", {})
|
||||||
|
for platform, data in platforms.items():
|
||||||
|
if isinstance(data, dict) and data.get("found", 0) > 0:
|
||||||
|
logger.info(f" {platform}: {data.get('found', 0)} found, {data.get('new', 0)} new")
|
||||||
|
|
||||||
|
# Log errors (but don't fail)
|
||||||
|
errors = result.get("errors", [])
|
||||||
|
if errors:
|
||||||
|
logger.warning(f"⚠️ {len(errors)} errors occurred:")
|
||||||
|
for err in errors[:5]:
|
||||||
|
logger.warning(f" - {str(err)[:100]}")
|
||||||
|
|
||||||
|
elapsed = (datetime.utcnow() - start_time).total_seconds()
|
||||||
|
logger.info(f"⏱️ Completed in {elapsed:.1f} seconds")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"❌ Scrape failed: {e}")
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_old_auctions():
|
||||||
|
"""Remove expired/old auctions from database."""
|
||||||
|
try:
|
||||||
|
async with AsyncSessionLocal() as db:
|
||||||
|
from sqlalchemy import delete, and_
|
||||||
|
from datetime import timedelta
|
||||||
|
from app.models.auction import DomainAuction
|
||||||
|
|
||||||
|
cutoff = datetime.utcnow() - timedelta(days=7)
|
||||||
|
|
||||||
|
# Mark expired auctions as inactive
|
||||||
|
from sqlalchemy import update
|
||||||
|
stmt = update(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.end_time < datetime.utcnow(),
|
||||||
|
DomainAuction.is_active == True
|
||||||
|
)
|
||||||
|
).values(is_active=False)
|
||||||
|
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
if result.rowcount > 0:
|
||||||
|
logger.info(f"🧹 Marked {result.rowcount} expired auctions as inactive")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cleanup error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point."""
|
||||||
|
print("="*60)
|
||||||
|
print(f"🐾 POUNCE Auction Scraper")
|
||||||
|
print(f" Started: {datetime.now().isoformat()}")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Run scrapers
|
||||||
|
result = asyncio.run(run_scrapers())
|
||||||
|
|
||||||
|
# Run cleanup
|
||||||
|
asyncio.run(cleanup_old_auctions())
|
||||||
|
|
||||||
|
print("="*60)
|
||||||
|
print(f"✅ Done!")
|
||||||
|
print("="*60)
|
||||||
|
|
||||||
|
# Exit with error code if no results
|
||||||
|
if result.get("error") or result.get("total_found", 0) == 0:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
Reference in New Issue
Block a user