pounce/backend/app/services/auction_scraper.py

"""
Domain Auction Scraper Service (Pounce)

Hard rules (project requirement):
- No mock/demo data.
- No estimated / placeholder auction prices.
- Store auctions only when we have real `current_bid` and a real `end_time`
  (or a provider-provided time-left that can be converted deterministically).

Current data sources (works without scraping Cloudflare-protected providers):
- Dynadot: hidden JSON API (via `hidden_api_scraper`)
- ExpiredDomains provider auction pages (GoDaddy / Namecheap / Sedo):
  include Price, Bids, Endtime
- Park.io: public auctions table includes Price, Bids, Close Date
- Sav: auctions table endpoint includes Price, Bids, Time left

Optional sources:
- DropCatch Partner API (if configured)
- Sedo Partner API (if configured)
- Playwright (opt-in) for Cloudflare-protected providers like NameJet
"""

import asyncio
import logging
import os
import re
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional

import httpx
from bs4 import BeautifulSoup
from sqlalchemy import and_, delete, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from zoneinfo import ZoneInfo

from app.models.auction import AuctionScrapeLog, DomainAuction
from app.services.dropcatch_api import dropcatch_client
from app.services.hidden_api_scrapers import build_affiliate_url, hidden_api_scraper
from app.services.sedo_api import sedo_client

try:
    from app.services.playwright_scraper import playwright_scraper
    PLAYWRIGHT_AVAILABLE = True
except ImportError:
    playwright_scraper = None
    PLAYWRIGHT_AVAILABLE = False

logger = logging.getLogger(__name__)

# Rate limiting: requests per minute per platform
RATE_LIMITS: Dict[str, int] = {
    "ExpiredDomains": 5,
    "Park.io": 10,
    "Sav": 10,
    "DropCatch": 10,
    "Sedo": 10,
    "NameJet": 5,
}

USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)


class AuctionScraperService:
    """
    Orchestrates scraping across multiple sources and stores results in DB.
    """

    def __init__(self):
        self.http_client: Optional[httpx.AsyncClient] = None
        self._last_request: Dict[str, datetime] = {}

    async def _get_client(self) -> httpx.AsyncClient:
        """Get or create HTTP client with appropriate headers (and optional proxy)."""
        if self.http_client is None or self.http_client.is_closed:
            proxy = os.getenv("SCRAPER_HTTP_PROXY") or os.getenv("SCRAPER_PROXY_URL")
            self.http_client = httpx.AsyncClient(
                timeout=30.0,
                follow_redirects=True,
                proxy=proxy,
                headers={
                    "User-Agent": USER_AGENT,
                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                    "Accept-Language": "en-US,en;q=0.5",
                    "Accept-Encoding": "gzip, deflate",
                    "DNT": "1",
                    "Connection": "keep-alive",
                    "Upgrade-Insecure-Requests": "1",
                },
            )
        return self.http_client

    async def _rate_limit(self, platform: str):
        """Enforce rate limiting per platform."""
        min_interval = 60 / RATE_LIMITS.get(platform, 10)
        last = self._last_request.get(platform)
        if last:
            elapsed = (datetime.utcnow() - last).total_seconds()
            if elapsed < min_interval:
                await asyncio.sleep(min_interval - elapsed)
        self._last_request[platform] = datetime.utcnow()

    # ----------------------------
    # Parsing & validation helpers
    # ----------------------------

    def _parse_datetime(self, value: Any) -> Optional[datetime]:
        """Parse datetime from common API formats (ISO strings, timestamps)."""
        if value is None:
            return None
        if isinstance(value, datetime):
            return value.replace(tzinfo=None)
        if isinstance(value, (int, float)):
            try:
                return datetime.utcfromtimestamp(float(value)).replace(tzinfo=None)
            except Exception:
                return None
        if isinstance(value, str):
            raw = value.strip()
            if not raw:
                return None
            try:
                return datetime.fromisoformat(raw.replace("Z", "+00:00")).replace(tzinfo=None)
            except Exception:
                return None
        return None

    def _to_float(self, value: Any) -> Optional[float]:
        """Parse float from strings like '$1,234.56'."""
        if value is None:
            return None
        if isinstance(value, (int, float)):
            return float(value)
        if isinstance(value, str):
            cleaned = value.strip().replace(",", "")
            cleaned = cleaned.replace("$", "").replace("€", "").replace("£", "")
            if not cleaned:
                return None
            try:
                return float(cleaned)
            except Exception:
                return None
        return None

    def _parse_price_currency(self, text: str) -> Optional[tuple[float, str]]:
        """Parse price strings like '7,100 USD' or '$530.00' into (price, currency)."""
        if not text:
            return None
        raw = text.strip()
        if not raw or raw.lower() in {"-", "n/a", "na"}:
            return None

        currency = "USD"
        m_amount = re.search(r"([0-9][0-9,]*(?:\.[0-9]+)?)", raw)
        if not m_amount:
            return None
        amount = self._to_float(m_amount.group(1))
        if amount is None:
            return None

        m_cur = re.search(r"\b([A-Z]{3})\b", raw)
        if m_cur:
            currency = m_cur.group(1).upper()
        elif "$" in raw:
            currency = "USD"
        elif "€" in raw:
            currency = "EUR"
        elif "£" in raw:
            currency = "GBP"

        return float(amount), currency

    def _parse_timeleft(self, text: str) -> Optional[timedelta]:
        """
        Parse relative time strings into a timedelta.

        Supported examples:
        - ExpiredDomains: '4d 20h 39m', '6m 48s', '23h 46m'
        - Sav: '6D 2H'
        """
        if not text:
            return None
        raw = text.strip().lower()
        if not raw or raw in {"-", "n/a", "na", "ended"}:
            return None

        matches = re.findall(r"(\d+)\s*([dhms])", raw)
        if not matches:
            return None

        total_seconds = 0
        for amount_str, unit in matches:
            try:
                amount = int(amount_str)
            except Exception:
                return None
            if unit == "d":
                total_seconds += amount * 86400
            elif unit == "h":
                total_seconds += amount * 3600
            elif unit == "m":
                total_seconds += amount * 60
            elif unit == "s":
                total_seconds += amount

        if total_seconds <= 0:
            return None
        return timedelta(seconds=total_seconds)

    def _sanitize_auction_payload(self, auction_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Ensure we only store real, complete auctions.

        Rules (strict):
        - domain/platform/auction_url must be present
        - current_bid must be > 0
        - end_time must be parseable
        - drop unknown keys (prevents accidental schema drift)
        """
        if not isinstance(auction_data, dict):
            return None

        domain = str(auction_data.get("domain", "")).strip().lower()
        platform = str(auction_data.get("platform", "")).strip()
        auction_url = str(auction_data.get("auction_url", "")).strip()

        if not domain or "." not in domain:
            return None
        if not platform:
            return None
        if not auction_url:
            return None

        tld = auction_data.get("tld") or domain.rsplit(".", 1)[-1]
        tld = str(tld).strip().lower().lstrip(".")
        if not tld:
            return None

        current_bid = self._to_float(auction_data.get("current_bid"))
        if current_bid is None or current_bid <= 0:
            return None

        end_time = self._parse_datetime(auction_data.get("end_time"))
        if end_time is None:
            return None

        try:
            num_bids = int(auction_data.get("num_bids", 0) or 0)
        except Exception:
            return None
        if num_bids < 0:
            return None

        # Normalize optional floats
        min_bid = self._to_float(auction_data.get("min_bid"))
        buy_now_price = self._to_float(auction_data.get("buy_now_price"))
        reserve_price = self._to_float(auction_data.get("reserve_price"))

        # Normalize watchers (optional)
        try:
            num_watchers = auction_data.get("num_watchers")
            num_watchers = int(num_watchers) if num_watchers is not None else None
        except Exception:
            num_watchers = None

        allowed = {c.name for c in DomainAuction.__table__.columns}
        cleaned = {k: v for k, v in auction_data.items() if k in allowed}

        cleaned.update(
            {
                "domain": domain,
                "tld": tld,
                "platform": platform,
                "auction_url": auction_url,
                "current_bid": float(current_bid),
                "min_bid": float(min_bid) if min_bid is not None else None,
                "buy_now_price": float(buy_now_price) if buy_now_price is not None else None,
                "reserve_price": float(reserve_price) if reserve_price is not None else None,
                "num_bids": num_bids,
                "num_watchers": num_watchers,
                "end_time": end_time,
                "is_active": True,
            }
        )

        # Persist pounce_score for DB-level sorting/filtering (Market feed)
        try:
            from app.services.pounce_score import calculate_pounce_score_v2

            cleaned["pounce_score"] = calculate_pounce_score_v2(
                domain,
                tld,
                num_bids=num_bids,
                age_years=int(auction_data.get("age_years") or 0),
                is_pounce=False,
            )
        except Exception:
            # Score is optional; keep payload valid if anything goes wrong
            cleaned["pounce_score"] = None

        currency = cleaned.get("currency") or "USD"
        cleaned["currency"] = str(currency).strip().upper()

        return cleaned

    async def _store_auction(self, db: AsyncSession, auction_data: Dict[str, Any]) -> str:
        """Store or update an auction in the database. Returns 'new', 'updated' or 'skipped'."""
        cleaned = self._sanitize_auction_payload(auction_data)
        if cleaned is None:
            return "skipped"

        # AsyncSessionLocal is configured with autoflush=False.
        # Flush pending inserts/updates so the existence check can see them and we don't create duplicates.
        await db.flush()

        existing = await db.execute(
            select(DomainAuction).where(
                and_(
                    DomainAuction.domain == cleaned["domain"],
                    DomainAuction.platform == cleaned["platform"],
                )
            )
        )
        existing = existing.scalar_one_or_none()

        if existing:
            # Prevent "end_time drift" on sources that only provide rounded time-left.
            # `end_time` must be monotonically decreasing (or stable) across scrapes.
            try:
                incoming_end = cleaned.get("end_time")
                if isinstance(incoming_end, datetime) and existing.end_time:
                    # Allow tiny increases due to rounding/clock skew, but never extend materially.
                    tolerance = timedelta(minutes=2)
                    if incoming_end > existing.end_time + tolerance:
                        cleaned["end_time"] = existing.end_time
            except Exception:
                pass

            for key, value in cleaned.items():
                setattr(existing, key, value)
            existing.updated_at = datetime.utcnow()
            existing.is_active = True
            return "updated"

        try:
            # Protect against concurrent inserts (e.g. cron overlap) when a unique index exists.
            async with db.begin_nested():
                db.add(DomainAuction(**cleaned))
                await db.flush()
            return "new"
        except IntegrityError:
            # Another transaction inserted the same (platform, domain) in the meantime.
            existing = await db.execute(
                select(DomainAuction).where(
                    and_(
                        DomainAuction.domain == cleaned["domain"],
                        DomainAuction.platform == cleaned["platform"],
                    )
                )
            )
            existing = existing.scalar_one_or_none()
            if not existing:
                return "skipped"

            for key, value in cleaned.items():
                setattr(existing, key, value)
            existing.updated_at = datetime.utcnow()
            existing.is_active = True
            return "updated"

    # ----------------------------
    # Source scrapers
    # ----------------------------

    async def _scrape_expireddomains_auction_page(
        self,
        db: AsyncSession,
        platform: str,
        url: str,
        limit: int = 200,
    ) -> Dict[str, Any]:
        """Scrape ExpiredDomains provider-specific auction pages (real Price/Bids/Endtime)."""
        result = {"found": 0, "new": 0, "updated": 0}

        log = AuctionScrapeLog(platform=platform)
        db.add(log)
        await db.commit()

        try:
            await self._rate_limit("ExpiredDomains")
            client = await self._get_client()

            resp = await client.get(url, timeout=20.0)
            if resp.status_code != 200:
                raise Exception(f"HTTP {resp.status_code}")

            soup = BeautifulSoup(resp.text, "lxml")
            table = soup.select_one("table.base1")
            if not table:
                raise Exception("ExpiredDomains table not found")

            headers = [th.get_text(" ", strip=True) for th in table.select("thead th")]
            header_index = {h: i for i, h in enumerate(headers)}

            required = ["Domain", "Price", "Bids", "Endtime"]
            if not all(k in header_index for k in required):
                raise Exception(f"Missing required columns: {required} in {headers}")

            rows = table.select("tbody tr")
            now = datetime.utcnow()

            for row in rows[:limit]:
                cols = row.find_all("td")
                if len(cols) < len(headers):
                    continue

                domain = cols[header_index["Domain"]].get_text(" ", strip=True).lower()
                if not domain or "." not in domain:
                    continue

                tld = domain.rsplit(".", 1)[-1].lower()

                parsed_price = self._parse_price_currency(cols[header_index["Price"]].get_text(" ", strip=True))
                if not parsed_price:
                    continue
                current_bid, currency = parsed_price
                if current_bid <= 0:
                    continue

                bids_raw = cols[header_index["Bids"]].get_text(" ", strip=True)
                try:
                    num_bids = int(re.sub(r"[^0-9]", "", bids_raw) or "0")
                except Exception:
                    continue

                end_raw = cols[header_index["Endtime"]].get_text(" ", strip=True)
                delta = self._parse_timeleft(end_raw)
                if not delta:
                    continue
                end_time = now + delta

                domain_link = cols[header_index["Domain"]].find("a")
                href = domain_link.get("href") if domain_link else None
                if href and href.startswith("/"):
                    href = f"https://www.expireddomains.net{href}"

                auction_data = {
                    "domain": domain,
                    "tld": tld,
                    "platform": platform,
                    "platform_auction_id": None,
                    "auction_url": href or build_affiliate_url(platform, domain),
                    "current_bid": current_bid,
                    "currency": currency,
                    "num_bids": num_bids,
                    "end_time": end_time,
                    "scrape_source": f"expireddomains:{url}",
                }

                status = await self._store_auction(db, auction_data)
                if status == "skipped":
                    continue
                result["found"] += 1
                result[status] += 1

            await db.commit()

            log.completed_at = datetime.utcnow()
            log.status = "success"
            log.auctions_found = result["found"]
            log.auctions_new = result["new"]
            log.auctions_updated = result["updated"]
            await db.commit()

        except Exception as e:
            log.completed_at = datetime.utcnow()
            log.status = "failed"
            log.error_message = str(e)[:500]
            await db.commit()
            logger.error(f"ExpiredDomains({platform}) scrape failed: {e}")

        return result

    async def _scrape_expireddomains_godaddy(self, db: AsyncSession) -> Dict[str, Any]:
        return await self._scrape_expireddomains_auction_page(
            db=db,
            platform="GoDaddy",
            url="https://www.expireddomains.net/godaddy-domain-auctions-with-bids/",
        )

    async def _scrape_expireddomains_namecheap(self, db: AsyncSession) -> Dict[str, Any]:
        return await self._scrape_expireddomains_auction_page(
            db=db,
            platform="Namecheap",
            url="https://www.expireddomains.net/namecheap-auction-domains/",
        )

    async def _scrape_expireddomains_sedo(self, db: AsyncSession) -> Dict[str, Any]:
        return await self._scrape_expireddomains_auction_page(
            db=db,
            platform="Sedo",
            url="https://www.expireddomains.net/sedo-auction-domains/",
        )

    async def _scrape_parkio_public(self, db: AsyncSession) -> Dict[str, Any]:
        """Scrape Park.io public auctions page (includes price + close date)."""
        platform = "Park.io"
        result = {"found": 0, "new": 0, "updated": 0}

        log = AuctionScrapeLog(platform=platform)
        db.add(log)
        await db.commit()

        try:
            await self._rate_limit(platform)
            client = await self._get_client()

            resp = await client.get("https://park.io/auctions", timeout=20.0)
            if resp.status_code != 200:
                raise Exception(f"HTTP {resp.status_code}")

            soup = BeautifulSoup(resp.text, "lxml")
            table = soup.select_one("table.table")
            if not table:
                raise Exception("Park.io table not found")

            rows = table.select("tbody tr")
            for row in rows[:200]:
                cols = row.find_all("td")
                if len(cols) < 5:
                    continue

                domain = cols[1].get_text(" ", strip=True).lower()
                if not domain or "." not in domain:
                    continue

                tld = domain.rsplit(".", 1)[-1].lower()

                parsed_price = self._parse_price_currency(cols[2].get_text(" ", strip=True))
                if not parsed_price:
                    continue
                current_bid, currency = parsed_price
                if current_bid <= 0:
                    continue

                bids_raw = cols[3].get_text(" ", strip=True)
                try:
                    num_bids = int(re.sub(r"[^0-9]", "", bids_raw) or "0")
                except Exception:
                    continue

                close_raw = cols[4].get_text(" ", strip=True)
                try:
                    # Park.io displays a naive timestamp in their platform timezone.
                    # Default timezone is America/New_York (configurable).
                    tz_name = os.getenv("PARKIO_TIMEZONE", "America/New_York")
                    tz = ZoneInfo(tz_name)
                    local_dt = datetime.strptime(close_raw, "%Y-%m-%d %H:%M:%S").replace(tzinfo=tz)
                    end_time = local_dt.astimezone(ZoneInfo("UTC")).replace(tzinfo=None)
                except Exception:
                    continue

                link_el = cols[1].find("a", href=True)
                href = link_el["href"] if link_el else None
                if href and href.startswith("/"):
                    href = f"https://park.io{href}"

                auction_data = {
                    "domain": domain,
                    "tld": tld,
                    "platform": platform,
                    "auction_url": href or "https://park.io/auctions",
                    "current_bid": current_bid,
                    "currency": currency,
                    "num_bids": num_bids,
                    "end_time": end_time,
                    "scrape_source": "park.io:auctions",
                }

                status = await self._store_auction(db, auction_data)
                if status == "skipped":
                    continue
                result["found"] += 1
                result[status] += 1

            await db.commit()

            log.completed_at = datetime.utcnow()
            log.status = "success"
            log.auctions_found = result["found"]
            log.auctions_new = result["new"]
            log.auctions_updated = result["updated"]
            await db.commit()

        except Exception as e:
            log.completed_at = datetime.utcnow()
            log.status = "failed"
            log.error_message = str(e)[:500]
            await db.commit()
            logger.error(f"Park.io scrape failed: {e}")

        return result

    async def _scrape_sav_public(self, db: AsyncSession) -> Dict[str, Any]:
        """Scrape Sav auctions from their HTML table endpoint."""
        platform = "Sav"
        result = {"found": 0, "new": 0, "updated": 0}

        log = AuctionScrapeLog(platform=platform)
        db.add(log)
        await db.commit()

        try:
            await self._rate_limit(platform)
            client = await self._get_client()

            now = datetime.utcnow()
            for page in range(0, 3):
                resp = await client.post(
                    f"https://www.sav.com/auctions/load_domains_ajax/{page}",
                    headers={"X-Requested-With": "XMLHttpRequest"},
                    timeout=20.0,
                )
                if resp.status_code != 200:
                    continue

                soup = BeautifulSoup(resp.text, "html.parser")
                rows = soup.select("tr")
                if not rows:
                    continue

                for row in rows[:200]:
                    cells = row.find_all("td")
                    if len(cells) < 7:
                        continue

                    domain_link = cells[1].find("a")
                    domain = domain_link.get_text(" ", strip=True).lower() if domain_link else ""
                    if not domain or "." not in domain:
                        continue

                    tld = domain.rsplit(".", 1)[-1].lower()

                    parsed_price = self._parse_price_currency(cells[2].get_text(" ", strip=True))
                    if not parsed_price:
                        continue
                    current_bid, currency = parsed_price
                    if current_bid <= 0:
                        continue

                    bids_raw = cells[3].get_text(" ", strip=True)
                    try:
                        num_bids = int(re.sub(r"[^0-9]", "", bids_raw) or "0")
                    except Exception:
                        continue

                    time_left_raw = cells[6].get_text(" ", strip=True)
                    delta = self._parse_timeleft(time_left_raw)
                    if not delta:
                        continue
                    end_time = now + delta

                    href = domain_link.get("href") if domain_link else None
                    if href and href.startswith("/"):
                        href = f"https://www.sav.com{href}"

                    auction_data = {
                        "domain": domain,
                        "tld": tld,
                        "platform": platform,
                        "auction_url": href or "https://www.sav.com/domains/auctions",
                        "current_bid": current_bid,
                        "currency": currency,
                        "num_bids": num_bids,
                        "end_time": end_time,
                        "scrape_source": f"sav:load_domains_ajax:{page}",
                    }

                    status = await self._store_auction(db, auction_data)
                    if status == "skipped":
                        continue
                    result["found"] += 1
                    result[status] += 1

                await asyncio.sleep(1)

            await db.commit()

            log.completed_at = datetime.utcnow()
            log.status = "success"
            log.auctions_found = result["found"]
            log.auctions_new = result["new"]
            log.auctions_updated = result["updated"]
            await db.commit()

        except Exception as e:
            log.completed_at = datetime.utcnow()
            log.status = "failed"
            log.error_message = str(e)[:500]
            await db.commit()
            logger.error(f"Sav scrape failed: {e}")

        return result

    # ----------------------------
    # Orchestration
    # ----------------------------

    async def scrape_all_platforms(self, db: AsyncSession) -> Dict[str, Any]:
        """Scrape all configured sources and store results in DB."""
        results = {
            "total_found": 0,
            "total_new": 0,
            "total_updated": 0,
            "platforms": {},
            "errors": [],
        }

        def _touch_platform(platform: str):
            if platform not in results["platforms"]:
                results["platforms"][platform] = {"found": 0, "new": 0, "updated": 0}

        # TIER 0: Hidden APIs (Dynadot, etc.)
        try:
            hidden_api_result = await hidden_api_scraper.scrape_all(limit_per_platform=100)
            for item in hidden_api_result.get("items", []):
                action = await self._store_auction(db, item)
                if action == "skipped":
                    continue
                platform = item.get("platform", "Unknown")
                _touch_platform(platform)
                results["platforms"][platform]["found"] += 1
                results["total_found"] += 1
                if action == "new":
                    results["platforms"][platform]["new"] += 1
                    results["total_new"] += 1
                elif action == "updated":
                    results["platforms"][platform]["updated"] += 1
                    results["total_updated"] += 1

            if hidden_api_result.get("errors"):
                for error in hidden_api_result["errors"]:
                    results["errors"].append(f"Hidden API: {error}")
        except Exception as e:
            results["errors"].append(f"Hidden APIs: {str(e)}")

        await db.commit()

        # TIER 1: Official Partner APIs (if configured)
        for platform_name, api_func in [("DropCatch", self._fetch_dropcatch_api), ("Sedo", self._fetch_sedo_api)]:
            try:
                api_result = await api_func(db)
                if api_result.get("found", 0) > 0:
                    results["platforms"][platform_name] = api_result
                    results["total_found"] += api_result.get("found", 0)
                    results["total_new"] += api_result.get("new", 0)
                    results["total_updated"] += api_result.get("updated", 0)
            except Exception as e:
                results["errors"].append(f"{platform_name} API: {str(e)}")

        # TIER 2: Web scraping (non-Cloudflare, or via ExpiredDomains provider pages)
        scrapers = [
            ("GoDaddy", self._scrape_expireddomains_godaddy),
            ("Namecheap", self._scrape_expireddomains_namecheap),
            ("Sedo", self._scrape_expireddomains_sedo),
            ("Park.io", self._scrape_parkio_public),
            ("Sav", self._scrape_sav_public),
        ]

        for platform_name, fn in scrapers:
            try:
                r = await fn(db)
                results["platforms"][platform_name] = r
                results["total_found"] += r.get("found", 0)
                results["total_new"] += r.get("new", 0)
                results["total_updated"] += r.get("updated", 0)
            except Exception as e:
                results["errors"].append(f"{platform_name}: {str(e)}")

        # TIER 3: Playwright (opt-in)
        playwright_enabled = os.getenv("POUNCE_ENABLE_PROTECTED_SCRAPERS", "false").lower() in ("1", "true", "yes")
        if PLAYWRIGHT_AVAILABLE and playwright_scraper and playwright_enabled:
            try:
                playwright_result = await playwright_scraper.scrape_all_protected()
                for item in playwright_result.get("items", []):
                    action = await self._store_auction(db, item)
                    if action == "skipped":
                        continue
                    platform = item.get("platform", "Unknown")
                    _touch_platform(platform)
                    results["platforms"][platform]["found"] += 1
                    results["total_found"] += 1
                    if action == "new":
                        results["platforms"][platform]["new"] += 1
                        results["total_new"] += 1
                    elif action == "updated":
                        results["platforms"][platform]["updated"] += 1
                        results["total_updated"] += 1
                if playwright_result.get("errors"):
                    for error in playwright_result["errors"]:
                        results["errors"].append(f"Playwright: {error}")
            except Exception as e:
                results["errors"].append(f"Playwright: {str(e)}")

        await db.commit()
        await self._cleanup_ended_auctions(db)
        return results

    # ----------------------------
    # Tier 1 helpers (official APIs)
    # ----------------------------

    async def _fetch_dropcatch_api(self, db: AsyncSession) -> Dict[str, Any]:
        platform = "DropCatch"
        result = {"found": 0, "new": 0, "updated": 0, "source": "api"}

        if not dropcatch_client.is_configured:
            return result

        log = AuctionScrapeLog(platform=platform)
        db.add(log)
        await db.commit()

        try:
            api_result = await dropcatch_client.search_auctions(page_size=100)
            auctions = api_result.get("auctions") or api_result.get("items") or []
            result["found"] = len(auctions)

            for dc_auction in auctions:
                auction_data = dropcatch_client.transform_to_pounce_format(dc_auction)
                status = await self._store_auction(db, auction_data)
                if status == "skipped":
                    continue
                result[status] += 1

            await db.commit()

            log.status = "success"
            log.auctions_found = result["found"]
            log.auctions_new = result["new"]
            log.auctions_updated = result["updated"]
            log.completed_at = datetime.utcnow()
            await db.commit()

        except Exception as e:
            log.status = "failed"
            log.error_message = str(e)[:500]
            log.completed_at = datetime.utcnow()
            await db.commit()

        return result

    async def _fetch_sedo_api(self, db: AsyncSession) -> Dict[str, Any]:
        platform = "Sedo"
        result = {"found": 0, "new": 0, "updated": 0, "source": "api"}

        if not sedo_client.is_configured:
            return result

        log = AuctionScrapeLog(platform=platform)
        db.add(log)
        await db.commit()

        try:
            api_result = await sedo_client.search_auctions(page_size=100)
            listings = api_result.get("domains") or api_result.get("items") or api_result.get("result") or []
            if isinstance(listings, dict):
                listings = list(listings.values()) if listings else []

            result["found"] = len(listings)

            for sedo_listing in listings:
                auction_data = sedo_client.transform_to_pounce_format(sedo_listing)
                status = await self._store_auction(db, auction_data)
                if status == "skipped":
                    continue
                result[status] += 1

            await db.commit()

            log.status = "success"
            log.auctions_found = result["found"]
            log.auctions_new = result["new"]
            log.auctions_updated = result["updated"]
            log.completed_at = datetime.utcnow()
            await db.commit()

        except Exception as e:
            log.status = "failed"
            log.error_message = str(e)[:500]
            log.completed_at = datetime.utcnow()
            await db.commit()

        return result

    # ----------------------------
    # DB cleanup / queries
    # ----------------------------

    async def _cleanup_ended_auctions(self, db: AsyncSession):
        """Mark auctions that have ended as inactive and delete very old inactive auctions."""
        now = datetime.utcnow()

        from sqlalchemy import update

        await db.execute(
            update(DomainAuction)
            .where(and_(DomainAuction.end_time < now, DomainAuction.is_active == True))
            .values(is_active=False)
        )

        cutoff = now - timedelta(days=30)
        await db.execute(
            delete(DomainAuction).where(and_(DomainAuction.is_active == False, DomainAuction.end_time < cutoff))
        )

        await db.commit()

    async def get_active_auctions(
        self,
        db: AsyncSession,
        platform: Optional[str] = None,
        tld: Optional[str] = None,
        keyword: Optional[str] = None,
        min_bid: Optional[float] = None,
        max_bid: Optional[float] = None,
        ending_within_hours: Optional[int] = None,
        sort_by: str = "end_time",
        limit: int = 50,
        offset: int = 0,
    ) -> List[DomainAuction]:
        """Get active auctions from database with filters."""
        query = select(DomainAuction).where(DomainAuction.is_active == True)

        if platform:
            query = query.where(DomainAuction.platform == platform)
        if tld:
            query = query.where(DomainAuction.tld == tld.lower().lstrip("."))
        if keyword:
            query = query.where(DomainAuction.domain.ilike(f"%{keyword}%"))
        if min_bid is not None:
            query = query.where(DomainAuction.current_bid >= min_bid)
        if max_bid is not None:
            query = query.where(DomainAuction.current_bid <= max_bid)
        if ending_within_hours:
            cutoff = datetime.utcnow() + timedelta(hours=ending_within_hours)
            query = query.where(DomainAuction.end_time <= cutoff)

        if sort_by == "end_time":
            query = query.order_by(DomainAuction.end_time.asc())
        elif sort_by == "bid_asc":
            query = query.order_by(DomainAuction.current_bid.asc())
        elif sort_by == "bid_desc":
            query = query.order_by(DomainAuction.current_bid.desc())
        elif sort_by == "bids":
            query = query.order_by(DomainAuction.num_bids.desc())

        result = await db.execute(query.offset(offset).limit(limit))
        return list(result.scalars().all())

    async def get_auction_count(self, db: AsyncSession) -> int:
        """Get total count of active auctions."""
        from sqlalchemy import func

        result = await db.execute(select(func.count(DomainAuction.id)).where(DomainAuction.is_active == True))
        return result.scalar() or 0

    async def close(self):
        """Close HTTP client."""
        if self.http_client and not self.http_client.is_closed:
            await self.http_client.aclose()


# Global instance
auction_scraper = AuctionScraperService()