- Remove seed/demo auction endpoint + scripts (no mock data) - Rebuild AuctionScraper: strict validation (no -- bids, requires end_time) - Add robust sources: - ExpiredDomains provider auction pages (GoDaddy/Namecheap/Sedo) - Park.io auctions table - Sav load_domains_ajax table - Simplify hidden API scrapers to Dynadot only - Add unique index on (platform, domain) + safe upsert - Update deployment/docs to reflect real scraping
171 lines
5.2 KiB
Python
171 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Automated Auction Scraper Script
|
|
|
|
This script runs all auction scrapers and saves results to the database.
|
|
Designed to be run via cron job every 30 minutes.
|
|
|
|
Usage:
|
|
python scripts/scrape_auctions.py
|
|
|
|
Cron example (every 30 minutes):
|
|
*/30 * * * * cd /home/user/pounce/backend && ./venv/bin/python scripts/scrape_auctions.py >> /var/log/pounce/scraper.log 2>&1
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import asyncio
|
|
import logging
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from app.services.auction_scraper import auction_scraper
|
|
from app.database import AsyncSessionLocal
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def ensure_auction_uniqueness():
|
|
"""
|
|
Ensure we have a unique index on (platform, domain) and clean duplicates once.
|
|
|
|
This prevents duplicate rows when the scraper runs repeatedly (cron) and when
|
|
the session uses autoflush=False.
|
|
"""
|
|
from sqlalchemy import text
|
|
from app.config import get_settings
|
|
|
|
settings = get_settings()
|
|
db_url = settings.database_url or ""
|
|
|
|
async with AsyncSessionLocal() as db:
|
|
# Best-effort de-duplication (SQLite only).
|
|
if db_url.startswith("sqlite"):
|
|
await db.execute(
|
|
text(
|
|
"""
|
|
DELETE FROM domain_auctions
|
|
WHERE id NOT IN (
|
|
SELECT MAX(id) FROM domain_auctions GROUP BY platform, domain
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
await db.commit()
|
|
|
|
# Create unique index (works for SQLite and Postgres).
|
|
await db.execute(
|
|
text(
|
|
"CREATE UNIQUE INDEX IF NOT EXISTS ux_auctions_platform_domain ON domain_auctions(platform, domain)"
|
|
)
|
|
)
|
|
await db.commit()
|
|
|
|
|
|
async def run_scrapers():
|
|
"""Run all auction scrapers."""
|
|
start_time = datetime.utcnow()
|
|
logger.info(f"🚀 Starting auction scrape at {start_time.isoformat()}")
|
|
|
|
try:
|
|
async with AsyncSessionLocal() as db:
|
|
result = await auction_scraper.scrape_all_platforms(db)
|
|
|
|
# Log results
|
|
total_found = result.get("total_found", 0)
|
|
total_new = result.get("total_new", 0)
|
|
|
|
logger.info(f"✅ Scrape complete!")
|
|
logger.info(f" Total Found: {total_found}")
|
|
logger.info(f" New Added: {total_new}")
|
|
|
|
# Log platform breakdown
|
|
platforms = result.get("platforms", {})
|
|
for platform, data in platforms.items():
|
|
if isinstance(data, dict) and data.get("found", 0) > 0:
|
|
logger.info(f" {platform}: {data.get('found', 0)} found, {data.get('new', 0)} new")
|
|
|
|
# Log errors (but don't fail)
|
|
errors = result.get("errors", [])
|
|
if errors:
|
|
logger.warning(f"⚠️ {len(errors)} errors occurred:")
|
|
for err in errors[:5]:
|
|
logger.warning(f" - {str(err)[:100]}")
|
|
|
|
elapsed = (datetime.utcnow() - start_time).total_seconds()
|
|
logger.info(f"⏱️ Completed in {elapsed:.1f} seconds")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.exception(f"❌ Scrape failed: {e}")
|
|
return {"error": str(e)}
|
|
|
|
|
|
async def cleanup_old_auctions():
|
|
"""Remove expired/old auctions from database."""
|
|
try:
|
|
async with AsyncSessionLocal() as db:
|
|
from sqlalchemy import delete, and_
|
|
from datetime import timedelta
|
|
from app.models.auction import DomainAuction
|
|
|
|
cutoff = datetime.utcnow() - timedelta(days=7)
|
|
|
|
# Mark expired auctions as inactive
|
|
from sqlalchemy import update
|
|
stmt = update(DomainAuction).where(
|
|
and_(
|
|
DomainAuction.end_time < datetime.utcnow(),
|
|
DomainAuction.is_active == True
|
|
)
|
|
).values(is_active=False)
|
|
|
|
result = await db.execute(stmt)
|
|
await db.commit()
|
|
|
|
if result.rowcount > 0:
|
|
logger.info(f"🧹 Marked {result.rowcount} expired auctions as inactive")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Cleanup error: {e}")
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
print("="*60)
|
|
print(f"🐾 POUNCE Auction Scraper")
|
|
print(f" Started: {datetime.now().isoformat()}")
|
|
print("="*60)
|
|
|
|
# Ensure DB uniqueness constraints
|
|
asyncio.run(ensure_auction_uniqueness())
|
|
|
|
# Run scrapers
|
|
result = asyncio.run(run_scrapers())
|
|
|
|
# Run cleanup
|
|
asyncio.run(cleanup_old_auctions())
|
|
|
|
print("="*60)
|
|
print(f"✅ Done!")
|
|
print("="*60)
|
|
|
|
# Exit with error code if no results
|
|
if result.get("error") or result.get("total_found", 0) == 0:
|
|
sys.exit(1)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|