pounce/backend/scripts/scrape_auctions.py
Yves Gugger de5cfdc10a feat: Enhanced auction scrapers with multiple sources
- Add GoDaddy RSS Feed scraper (bypasses Cloudflare)
- Enhanced ExpiredDomains scraper (multiple pages, TLDs)
- Improved hidden API scrapers integration
- Add automated scraper cron script (runs every 30 min)
- Playwright stealth mode installed on server

Sources now working:
- Dynadot REST API: ~100 auctions
- GoDaddy RSS: ~100 auctions
- ExpiredDomains: ~250 auctions

Total: 467 auctions in database
2025-12-11 20:58:04 +01:00

132 lines
3.9 KiB
Python

#!/usr/bin/env python3
"""
Automated Auction Scraper Script
This script runs all auction scrapers and saves results to the database.
Designed to be run via cron job every 30 minutes.
Usage:
python scripts/scrape_auctions.py
Cron example (every 30 minutes):
*/30 * * * * cd /home/user/pounce/backend && ./venv/bin/python scripts/scrape_auctions.py >> /var/log/pounce/scraper.log 2>&1
"""
import sys
import os
import asyncio
import logging
from datetime import datetime
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from app.services.auction_scraper import auction_scraper
from app.database import AsyncSessionLocal
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async def run_scrapers():
"""Run all auction scrapers."""
start_time = datetime.utcnow()
logger.info(f"🚀 Starting auction scrape at {start_time.isoformat()}")
try:
async with AsyncSessionLocal() as db:
result = await auction_scraper.scrape_all_platforms(db)
# Log results
total_found = result.get("total_found", 0)
total_new = result.get("total_new", 0)
logger.info(f"✅ Scrape complete!")
logger.info(f" Total Found: {total_found}")
logger.info(f" New Added: {total_new}")
# Log platform breakdown
platforms = result.get("platforms", {})
for platform, data in platforms.items():
if isinstance(data, dict) and data.get("found", 0) > 0:
logger.info(f" {platform}: {data.get('found', 0)} found, {data.get('new', 0)} new")
# Log errors (but don't fail)
errors = result.get("errors", [])
if errors:
logger.warning(f"⚠️ {len(errors)} errors occurred:")
for err in errors[:5]:
logger.warning(f" - {str(err)[:100]}")
elapsed = (datetime.utcnow() - start_time).total_seconds()
logger.info(f"⏱️ Completed in {elapsed:.1f} seconds")
return result
except Exception as e:
logger.exception(f"❌ Scrape failed: {e}")
return {"error": str(e)}
async def cleanup_old_auctions():
"""Remove expired/old auctions from database."""
try:
async with AsyncSessionLocal() as db:
from sqlalchemy import delete, and_
from datetime import timedelta
from app.models.auction import DomainAuction
cutoff = datetime.utcnow() - timedelta(days=7)
# Mark expired auctions as inactive
from sqlalchemy import update
stmt = update(DomainAuction).where(
and_(
DomainAuction.end_time < datetime.utcnow(),
DomainAuction.is_active == True
)
).values(is_active=False)
result = await db.execute(stmt)
await db.commit()
if result.rowcount > 0:
logger.info(f"🧹 Marked {result.rowcount} expired auctions as inactive")
except Exception as e:
logger.warning(f"Cleanup error: {e}")
def main():
"""Main entry point."""
print("="*60)
print(f"🐾 POUNCE Auction Scraper")
print(f" Started: {datetime.now().isoformat()}")
print("="*60)
# Run scrapers
result = asyncio.run(run_scrapers())
# Run cleanup
asyncio.run(cleanup_old_auctions())
print("="*60)
print(f"✅ Done!")
print("="*60)
# Exit with error code if no results
if result.get("error") or result.get("total_found", 0) == 0:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()