pounce/backend/scripts/scrape_auctions.py

#!/usr/bin/env python3
"""
Automated Auction Scraper Script

This script runs all auction scrapers and saves results to the database.
Designed to be run via cron job every 30 minutes.

Usage:
    python scripts/scrape_auctions.py

Cron example (every 30 minutes):
    */30 * * * * cd /home/user/pounce/backend && ./venv/bin/python scripts/scrape_auctions.py >> /var/log/pounce/scraper.log 2>&1
"""

import sys
import os
import asyncio
import logging
from datetime import datetime
from pathlib import Path

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from app.services.auction_scraper import auction_scraper
from app.database import AsyncSessionLocal

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

async def ensure_auction_uniqueness():
    """
    Ensure we have a unique index on (platform, domain) and clean duplicates once.

    This prevents duplicate rows when the scraper runs repeatedly (cron) and when
    the session uses autoflush=False.
    """
    from sqlalchemy import text
    from app.config import get_settings

    settings = get_settings()
    db_url = settings.database_url or ""

    async with AsyncSessionLocal() as db:
        # Best-effort de-duplication (SQLite only).
        if db_url.startswith("sqlite"):
            await db.execute(
                text(
                    """
                    DELETE FROM domain_auctions
                    WHERE id NOT IN (
                        SELECT MAX(id) FROM domain_auctions GROUP BY platform, domain
                    )
                    """
                )
            )
            await db.commit()

        # Create unique index (works for SQLite and Postgres).
        await db.execute(
            text(
                "CREATE UNIQUE INDEX IF NOT EXISTS ux_auctions_platform_domain ON domain_auctions(platform, domain)"
            )
        )
        await db.commit()


async def run_scrapers():
    """Run all auction scrapers."""
    start_time = datetime.utcnow()
    logger.info(f"🚀 Starting auction scrape at {start_time.isoformat()}")

    try:
        async with AsyncSessionLocal() as db:
            result = await auction_scraper.scrape_all_platforms(db)

            # Log results
            total_found = result.get("total_found", 0)
            total_new = result.get("total_new", 0)

            logger.info(f"✅ Scrape complete!")
            logger.info(f"   Total Found: {total_found}")
            logger.info(f"   New Added: {total_new}")

            # Log platform breakdown
            platforms = result.get("platforms", {})
            for platform, data in platforms.items():
                if isinstance(data, dict) and data.get("found", 0) > 0:
                    logger.info(f"   {platform}: {data.get('found', 0)} found, {data.get('new', 0)} new")

            # Log errors (but don't fail)
            errors = result.get("errors", [])
            if errors:
                logger.warning(f"⚠️ {len(errors)} errors occurred:")
                for err in errors[:5]:
                    logger.warning(f"   - {str(err)[:100]}")

            elapsed = (datetime.utcnow() - start_time).total_seconds()
            logger.info(f"⏱️ Completed in {elapsed:.1f} seconds")

            return result

    except Exception as e:
        logger.exception(f"❌ Scrape failed: {e}")
        return {"error": str(e)}


async def cleanup_old_auctions():
    """Remove expired/old auctions from database."""
    try:
        async with AsyncSessionLocal() as db:
            from sqlalchemy import delete, and_
            from datetime import timedelta
            from app.models.auction import DomainAuction

            cutoff = datetime.utcnow() - timedelta(days=7)

            # Mark expired auctions as inactive
            from sqlalchemy import update
            stmt = update(DomainAuction).where(
                and_(
                    DomainAuction.end_time < datetime.utcnow(),
                    DomainAuction.is_active == True
                )
            ).values(is_active=False)

            result = await db.execute(stmt)
            await db.commit()

            if result.rowcount > 0:
                logger.info(f"🧹 Marked {result.rowcount} expired auctions as inactive")

    except Exception as e:
        logger.warning(f"Cleanup error: {e}")


def main():
    """Main entry point."""
    print("="*60)
    print(f"🐾 POUNCE Auction Scraper")
    print(f"   Started: {datetime.now().isoformat()}")
    print("="*60)

    # Ensure DB uniqueness constraints
    asyncio.run(ensure_auction_uniqueness())

    # Run scrapers
    result = asyncio.run(run_scrapers())

    # Run cleanup
    asyncio.run(cleanup_old_auctions())

    print("="*60)
    print(f"✅ Done!")
    print("="*60)

    # Exit with error code if no results
    if result.get("error") or result.get("total_found", 0) == 0:
        sys.exit(1)

    sys.exit(0)


if __name__ == "__main__":
    main()