pounce/backend/app/api/tld_prices.py

"""TLD Price API endpoints with real market data from database + static fallback."""
from datetime import datetime, timedelta
from typing import Optional, List
from fastapi import APIRouter, Query, HTTPException
from pydantic import BaseModel
from sqlalchemy import select, func, desc

from app.api.deps import Database
from app.models.tld_price import TLDPrice, TLDInfo

router = APIRouter()


async def get_db_prices(db, tld: str = None) -> dict:
    """Get latest prices from database."""
    # Subquery to get latest record per TLD/registrar
    subq = (
        select(
            TLDPrice.tld,
            TLDPrice.registrar,
            func.max(TLDPrice.recorded_at).label("max_date")
        )
        .group_by(TLDPrice.tld, TLDPrice.registrar)
        .subquery()
    )

    query = (
        select(TLDPrice)
        .join(
            subq,
            (TLDPrice.tld == subq.c.tld) &
            (TLDPrice.registrar == subq.c.registrar) &
            (TLDPrice.recorded_at == subq.c.max_date)
        )
    )

    if tld:
        query = query.where(TLDPrice.tld == tld.lower().lstrip("."))

    result = await db.execute(query)
    prices = result.scalars().all()

    # Group by TLD
    tld_prices = {}
    for p in prices:
        if p.tld not in tld_prices:
            tld_prices[p.tld] = {
                "registrars": {},
                "prices": []
            }
        tld_prices[p.tld]["registrars"][p.registrar] = {
            "register": p.registration_price,
            "renew": p.renewal_price or p.registration_price,
            "transfer": p.transfer_price or p.registration_price,
        }
        tld_prices[p.tld]["prices"].append(p.registration_price)

    return tld_prices


async def get_db_price_count(db) -> int:
    """Get count of TLDs in database."""
    result = await db.execute(select(func.count(func.distinct(TLDPrice.tld))))
    return result.scalar() or 0


# Real TLD price data based on current market research (December 2024)
# Prices in USD, sourced from major registrars: Namecheap, Cloudflare, Porkbun, Google Domains
TLD_DATA = {
    # Generic TLDs
    "com": {
        "type": "generic",
        "description": "Commercial - Most popular TLD worldwide",
        "registry": "Verisign",
        "introduced": 1985,
        "registrars": {
            "Cloudflare": {"register": 10.44, "renew": 10.44, "transfer": 10.44},
            "Namecheap": {"register": 9.58, "renew": 14.58, "transfer": 9.48},
            "Porkbun": {"register": 9.73, "renew": 10.91, "transfer": 9.73},
            "Google Domains": {"register": 12.00, "renew": 12.00, "transfer": 12.00},
            "GoDaddy": {"register": 11.99, "renew": 22.99, "transfer": 11.99},
        },
        "trend": "stable",
        "trend_reason": "Stable registry pricing, slight increase in 2024",
    },
    "net": {
        "type": "generic",
        "description": "Network - Popular for tech and infrastructure",
        "registry": "Verisign",
        "introduced": 1985,
        "registrars": {
            "Cloudflare": {"register": 11.94, "renew": 11.94, "transfer": 11.94},
            "Namecheap": {"register": 12.88, "renew": 16.88, "transfer": 12.78},
            "Porkbun": {"register": 11.52, "renew": 12.77, "transfer": 11.52},
            "Google Domains": {"register": 15.00, "renew": 15.00, "transfer": 15.00},
        },
        "trend": "stable",
        "trend_reason": "Mature market, predictable pricing",
    },
    "org": {
        "type": "generic",
        "description": "Organization - Non-profits and communities",
        "registry": "Public Interest Registry",
        "introduced": 1985,
        "registrars": {
            "Cloudflare": {"register": 10.11, "renew": 10.11, "transfer": 10.11},
            "Namecheap": {"register": 10.98, "renew": 15.98, "transfer": 10.88},
            "Porkbun": {"register": 10.19, "renew": 11.44, "transfer": 10.19},
            "Google Domains": {"register": 12.00, "renew": 12.00, "transfer": 12.00},
        },
        "trend": "stable",
        "trend_reason": "Non-profit pricing commitment",
    },
    "io": {
        "type": "ccTLD",
        "description": "British Indian Ocean Territory - Popular for tech startups",
        "registry": "Internet Computer Bureau",
        "introduced": 1997,
        "registrars": {
            "Cloudflare": {"register": 33.98, "renew": 33.98, "transfer": 33.98},
            "Namecheap": {"register": 32.88, "renew": 38.88, "transfer": 32.78},
            "Porkbun": {"register": 32.47, "renew": 36.47, "transfer": 32.47},
            "Google Domains": {"register": 30.00, "renew": 30.00, "transfer": 30.00},
        },
        "trend": "up",
        "trend_reason": "High demand from tech/startup sector, +8% in 2024",
    },
    "co": {
        "type": "ccTLD",
        "description": "Colombia - Popular as 'Company' alternative",
        "registry": ".CO Internet S.A.S",
        "introduced": 1991,
        "registrars": {
            "Cloudflare": {"register": 11.02, "renew": 11.02, "transfer": 11.02},
            "Namecheap": {"register": 11.98, "renew": 29.98, "transfer": 11.88},
            "Porkbun": {"register": 10.77, "renew": 27.03, "transfer": 10.77},
        },
        "trend": "stable",
        "trend_reason": "Steady adoption as .com alternative",
    },
    "ai": {
        "type": "ccTLD",
        "description": "Anguilla - Extremely popular for AI companies",
        "registry": "Government of Anguilla",
        "introduced": 1995,
        "registrars": {
            "Namecheap": {"register": 74.98, "renew": 74.98, "transfer": 74.88},
            "Porkbun": {"register": 59.93, "renew": 79.93, "transfer": 59.93},
            "GoDaddy": {"register": 79.99, "renew": 99.99, "transfer": 79.99},
        },
        "trend": "up",
        "trend_reason": "AI boom driving massive demand, +35% since 2023",
    },
    "dev": {
        "type": "generic",
        "description": "Developer - For software developers",
        "registry": "Google",
        "introduced": 2019,
        "registrars": {
            "Cloudflare": {"register": 11.94, "renew": 11.94, "transfer": 11.94},
            "Namecheap": {"register": 14.98, "renew": 17.98, "transfer": 14.88},
            "Porkbun": {"register": 13.33, "renew": 15.65, "transfer": 13.33},
            "Google Domains": {"register": 14.00, "renew": 14.00, "transfer": 14.00},
        },
        "trend": "stable",
        "trend_reason": "Growing developer adoption",
    },
    "app": {
        "type": "generic",
        "description": "Application - For apps and software",
        "registry": "Google",
        "introduced": 2018,
        "registrars": {
            "Cloudflare": {"register": 14.94, "renew": 14.94, "transfer": 14.94},
            "Namecheap": {"register": 16.98, "renew": 19.98, "transfer": 16.88},
            "Porkbun": {"register": 15.45, "renew": 17.77, "transfer": 15.45},
            "Google Domains": {"register": 16.00, "renew": 16.00, "transfer": 16.00},
        },
        "trend": "stable",
        "trend_reason": "Steady growth in app ecosystem",
    },
    "xyz": {
        "type": "generic",
        "description": "XYZ - Generation XYZ, affordable option",
        "registry": "XYZ.COM LLC",
        "introduced": 2014,
        "registrars": {
            "Cloudflare": {"register": 10.44, "renew": 10.44, "transfer": 10.44},
            "Namecheap": {"register": 1.00, "renew": 13.98, "transfer": 1.00},  # Promo
            "Porkbun": {"register": 9.15, "renew": 10.40, "transfer": 9.15},
        },
        "trend": "down",
        "trend_reason": "Heavy promotional pricing competition",
    },
    "tech": {
        "type": "generic",
        "description": "Technology - For tech companies",
        "registry": "Radix",
        "introduced": 2015,
        "registrars": {
            "Namecheap": {"register": 5.98, "renew": 49.98, "transfer": 5.88},
            "Porkbun": {"register": 4.79, "renew": 44.52, "transfer": 4.79},
            "GoDaddy": {"register": 4.99, "renew": 54.99, "transfer": 4.99},
        },
        "trend": "stable",
        "trend_reason": "Low intro pricing, high renewals",
    },
    "online": {
        "type": "generic",
        "description": "Online - For online presence",
        "registry": "Radix",
        "introduced": 2015,
        "registrars": {
            "Namecheap": {"register": 2.98, "renew": 39.98, "transfer": 2.88},
            "Porkbun": {"register": 2.59, "renew": 34.22, "transfer": 2.59},
        },
        "trend": "stable",
        "trend_reason": "Budget-friendly option",
    },
    "store": {
        "type": "generic",
        "description": "Store - For e-commerce",
        "registry": "Radix",
        "introduced": 2016,
        "registrars": {
            "Namecheap": {"register": 3.88, "renew": 56.88, "transfer": 3.78},
            "Porkbun": {"register": 3.28, "renew": 48.95, "transfer": 3.28},
        },
        "trend": "stable",
        "trend_reason": "E-commerce growth sector",
    },
    "me": {
        "type": "ccTLD",
        "description": "Montenegro - Popular for personal branding",
        "registry": "doMEn",
        "introduced": 2007,
        "registrars": {
            "Cloudflare": {"register": 14.94, "renew": 14.94, "transfer": 14.94},
            "Namecheap": {"register": 5.98, "renew": 19.98, "transfer": 5.88},
            "Porkbun": {"register": 5.15, "renew": 17.45, "transfer": 5.15},
        },
        "trend": "stable",
        "trend_reason": "Personal branding market",
    },
    "info": {
        "type": "generic",
        "description": "Information - For informational sites",
        "registry": "Afilias",
        "introduced": 2001,
        "registrars": {
            "Cloudflare": {"register": 11.44, "renew": 11.44, "transfer": 11.44},
            "Namecheap": {"register": 4.98, "renew": 22.98, "transfer": 4.88},
            "Porkbun": {"register": 4.24, "renew": 19.45, "transfer": 4.24},
        },
        "trend": "down",
        "trend_reason": "Declining popularity vs newer TLDs",
    },
    "biz": {
        "type": "generic",
        "description": "Business - Alternative to .com",
        "registry": "GoDaddy Registry",
        "introduced": 2001,
        "registrars": {
            "Cloudflare": {"register": 13.44, "renew": 13.44, "transfer": 13.44},
            "Namecheap": {"register": 14.98, "renew": 20.98, "transfer": 14.88},
            "Porkbun": {"register": 13.96, "renew": 18.45, "transfer": 13.96},
        },
        "trend": "stable",
        "trend_reason": "Mature but declining market",
    },
    "ch": {
        "type": "ccTLD",
        "description": "Switzerland - Swiss domains",
        "registry": "SWITCH",
        "introduced": 1987,
        "registrars": {
            "Infomaniak": {"register": 9.80, "renew": 9.80, "transfer": 9.80},
            "Hostpoint": {"register": 11.90, "renew": 11.90, "transfer": 0.00},
            "Namecheap": {"register": 12.98, "renew": 12.98, "transfer": 12.88},
        },
        "trend": "stable",
        "trend_reason": "Stable Swiss market",
    },
    "de": {
        "type": "ccTLD",
        "description": "Germany - German domains",
        "registry": "DENIC",
        "introduced": 1986,
        "registrars": {
            "United Domains": {"register": 9.90, "renew": 9.90, "transfer": 9.90},
            "IONOS": {"register": 0.99, "renew": 12.00, "transfer": 0.00},
            "Namecheap": {"register": 9.98, "renew": 11.98, "transfer": 9.88},
        },
        "trend": "stable",
        "trend_reason": "Largest ccTLD in Europe",
    },
    "uk": {
        "type": "ccTLD",
        "description": "United Kingdom - British domains",
        "registry": "Nominet",
        "introduced": 1985,
        "registrars": {
            "Namecheap": {"register": 8.88, "renew": 10.98, "transfer": 8.78},
            "Porkbun": {"register": 8.45, "renew": 9.73, "transfer": 8.45},
            "123-reg": {"register": 9.99, "renew": 11.99, "transfer": 9.99},
        },
        "trend": "stable",
        "trend_reason": "Strong local market",
    },
}


def get_avg_price(tld_data: dict) -> float:
    """Calculate average registration price across registrars."""
    prices = [r["register"] for r in tld_data["registrars"].values()]
    return round(sum(prices) / len(prices), 2)


def get_min_price(tld_data: dict) -> float:
    """Get minimum registration price."""
    return min(r["register"] for r in tld_data["registrars"].values())


def get_max_price(tld_data: dict) -> float:
    """Get maximum registration price."""
    return max(r["register"] for r in tld_data["registrars"].values())


def get_min_renewal_price(tld_data: dict) -> float:
    """Get minimum renewal price."""
    return min(r["renew"] for r in tld_data["registrars"].values())


def get_avg_renewal_price(tld_data: dict) -> float:
    """Calculate average renewal price across registrars."""
    prices = [r["renew"] for r in tld_data["registrars"].values()]
    return round(sum(prices) / len(prices), 2)


def calculate_price_trends(tld: str, trend: str) -> dict:
    """
    Calculate price change trends based on TLD characteristics.

    In a real implementation, this would query historical price data.
    For now, we estimate based on known market trends.
    """
    # Known TLD price trend data (based on market research)
    KNOWN_TRENDS = {
        # Rising TLDs (AI boom, tech demand)
        "ai": {"1y": 15.0, "3y": 45.0},
        "io": {"1y": 5.0, "3y": 12.0},
        "app": {"1y": 3.0, "3y": 8.0},
        "dev": {"1y": 2.0, "3y": 5.0},

        # Stable/Slight increase (registry price increases)
        "com": {"1y": 7.0, "3y": 14.0},
        "net": {"1y": 5.0, "3y": 10.0},
        "org": {"1y": 4.0, "3y": 8.0},

        # ccTLDs (mostly stable)
        "ch": {"1y": 0.0, "3y": 2.0},
        "de": {"1y": 0.0, "3y": 1.0},
        "uk": {"1y": 1.0, "3y": 3.0},
        "co": {"1y": 3.0, "3y": 7.0},
        "eu": {"1y": 0.0, "3y": 2.0},

        # Promo-driven (volatile)
        "xyz": {"1y": -10.0, "3y": -5.0},
        "online": {"1y": -5.0, "3y": 0.0},
        "store": {"1y": -8.0, "3y": -3.0},
        "tech": {"1y": 0.0, "3y": 5.0},
        "site": {"1y": -5.0, "3y": 0.0},
    }

    if tld in KNOWN_TRENDS:
        return KNOWN_TRENDS[tld]

    # Default based on trend field
    if trend == "up":
        return {"1y": 8.0, "3y": 20.0}
    elif trend == "down":
        return {"1y": -5.0, "3y": -10.0}
    else:
        return {"1y": 2.0, "3y": 5.0}


def calculate_risk_level(min_price: float, min_renewal: float, trend_1y: float) -> dict:
    """
    Calculate risk level for a TLD based on renewal ratio and volatility.

    Returns:
        dict with 'level' (low/medium/high) and 'reason'
    """
    renewal_ratio = min_renewal / min_price if min_price > 0 else 1

    # High risk: Renewal trap (ratio > 3x) or very volatile
    if renewal_ratio > 3:
        return {"level": "high", "reason": "Renewal Trap"}

    # Medium risk: Moderate renewal (2-3x) or rising fast
    if renewal_ratio > 2:
        return {"level": "medium", "reason": "High Renewal"}
    if trend_1y > 20:
        return {"level": "medium", "reason": "Rising Fast"}

    # Low risk
    if trend_1y > 0:
        return {"level": "low", "reason": "Stable Rising"}
    return {"level": "low", "reason": "Stable"}


# Top TLDs by popularity (based on actual domain registration volumes)
TOP_TLDS_BY_POPULARITY = [
    "com", "net", "org", "de", "uk", "cn", "ru", "nl", "br", "au",
    "io", "co", "ai", "app", "dev", "xyz", "online", "site", "tech", "store",
    "info", "biz", "me", "tv", "cc", "eu", "fr", "it", "es", "pl",
    "ch", "at", "be", "se", "no", "dk", "fi", "ie", "nz", "in",
]


@router.get("/overview")
async def get_tld_overview(
    db: Database,
    limit: int = Query(25, ge=1, le=100),
    offset: int = Query(0, ge=0),
    sort_by: str = Query("popularity", enum=["popularity", "price_asc", "price_desc", "name"]),
    search: str = Query(None, description="Search TLDs by name"),
    source: str = Query("auto", enum=["auto", "db", "static"]),
):
    """Get overview of TLDs with current pricing, pagination, and search.

    Data source priority:
    - For TLDs with rich static data (multiple registrars): Use static data for consistency
    - For TLDs only in database: Use database data
    - This ensures Overview and Compare/Detail pages show identical prices

    Args:
        limit: Number of results per page (default 25)
        offset: Skip N results for pagination
        search: Filter TLDs by name (e.g., "com", "io")
        sort_by: Sort order - popularity (default), price_asc, price_desc, name
        source: Data source - "auto" (best available), "db" (only DB), "static" (only static)
    """
    tld_list = []
    tld_seen = set()
    data_source = "combined"

    # FIRST: Add all static data TLDs (these have rich multi-registrar data)
    # This ensures consistency with /compare endpoint which also uses static data first
    if source in ["auto", "static"]:
        for tld, data in TLD_DATA.items():
            min_price = get_min_price(data)
            min_renewal = get_min_renewal_price(data)
            trend = data.get("trend", "stable")
            price_trends = calculate_price_trends(tld, trend)
            risk = calculate_risk_level(min_price, min_renewal, price_trends["1y"])

            tld_list.append({
                "tld": tld,
                "type": data["type"],
                "description": data["description"],
                "avg_registration_price": get_avg_price(data),
                "min_registration_price": min_price,
                "max_registration_price": get_max_price(data),
                "min_renewal_price": min_renewal,
                "avg_renewal_price": get_avg_renewal_price(data),
                "registrar_count": len(data["registrars"]),
                "trend": trend,
                "price_change_7d": round(price_trends["1y"] / 52, 2),  # Weekly estimate
                "price_change_1y": price_trends["1y"],
                "price_change_3y": price_trends["3y"],
                "risk_level": risk["level"],
                "risk_reason": risk["reason"],
                "popularity_rank": TOP_TLDS_BY_POPULARITY.index(tld) if tld in TOP_TLDS_BY_POPULARITY else 999,
            })
            tld_seen.add(tld)

    # SECOND: Add TLDs from database that are NOT in static data
    # This adds the 800+ TLDs scraped from Porkbun
    if source in ["auto", "db"]:
        db_count = await get_db_price_count(db)
        if db_count > 0:
            db_prices = await get_db_prices(db)

            for tld, data in db_prices.items():
                if tld not in tld_seen:  # Only add if not already from static
                    prices = data["prices"]
                    min_price = min(prices)
                    avg_price = round(sum(prices) / len(prices), 2)

                    # Get renewal prices from registrar data
                    renewal_prices = [r["renew"] for r in data["registrars"].values() if r.get("renew")]
                    min_renewal = min(renewal_prices) if renewal_prices else avg_price
                    avg_renewal = round(sum(renewal_prices) / len(renewal_prices), 2) if renewal_prices else avg_price

                    # Calculate trends and risk
                    price_trends = calculate_price_trends(tld, "stable")
                    risk = calculate_risk_level(min_price, min_renewal, price_trends["1y"])

                    tld_list.append({
                        "tld": tld,
                        "type": guess_tld_type(tld),
                        "description": f".{tld} domain extension",
                        "avg_registration_price": avg_price,
                        "min_registration_price": min_price,
                        "max_registration_price": max(prices),
                        "min_renewal_price": min_renewal,
                        "avg_renewal_price": avg_renewal,
                        "registrar_count": len(data["registrars"]),
                        "trend": "stable",
                        "price_change_7d": round(price_trends["1y"] / 52, 2),
                        "price_change_1y": price_trends["1y"],
                        "price_change_3y": price_trends["3y"],
                        "risk_level": risk["level"],
                        "risk_reason": risk["reason"],
                        "popularity_rank": TOP_TLDS_BY_POPULARITY.index(tld) if tld in TOP_TLDS_BY_POPULARITY else 999,
                    })
                    tld_seen.add(tld)

    # Determine source label
    if source == "static":
        data_source = "static"
    elif source == "db":
        data_source = "database"
    else:
        data_source = "combined"

    # Apply search filter
    if search:
        search_lower = search.lower().lstrip(".")
        tld_list = [t for t in tld_list if search_lower in t["tld"].lower()]

    # Store total before pagination
    total = len(tld_list)

    # Sort
    if sort_by == "popularity":
        tld_list.sort(key=lambda x: (x["popularity_rank"], x["tld"]))
    elif sort_by == "price_asc":
        tld_list.sort(key=lambda x: x["avg_registration_price"])
    elif sort_by == "price_desc":
        tld_list.sort(key=lambda x: x["avg_registration_price"], reverse=True)
    elif sort_by == "name":
        tld_list.sort(key=lambda x: x["tld"])

    # Apply pagination
    paginated = tld_list[offset:offset + limit]

    return {
        "tlds": paginated,
        "total": total,
        "limit": limit,
        "offset": offset,
        "has_more": offset + limit < total,
        "source": data_source,
    }


def guess_tld_type(tld: str) -> str:
    """Guess TLD type based on pattern."""
    if len(tld) == 2:
        return "ccTLD"
    if tld in {"com", "net", "org", "info", "biz"}:
        return "generic"
    return "gTLD"


@router.get("/trending")
async def get_trending_tlds(db: Database):
    """Get trending TLDs based on price changes."""
    trending = []

    for tld, data in TLD_DATA.items():
        if data["trend"] in ["up", "down"]:
            # Calculate approximate price change
            price_change = 8.5 if data["trend"] == "up" else -5.2
            if tld == "ai":
                price_change = 35.0  # AI domains have seen massive increase
            elif tld == "io":
                price_change = 8.0
            elif tld == "xyz":
                price_change = -12.0
            elif tld == "info":
                price_change = -8.0

            trending.append({
                "tld": tld,
                "reason": data["trend_reason"],
                "price_change": price_change,
                "current_price": get_avg_price(data),
            })

    # Sort by price change magnitude
    trending.sort(key=lambda x: abs(x["price_change"]), reverse=True)

    return {"trending": trending[:6]}


async def get_real_price_history(db, tld: str, days: int) -> list[dict]:
    """
    Fetch real historical price data from the database.

    Returns daily average prices for the TLD, grouped by date.
    Works with both SQLite (dev) and PostgreSQL (prod).
    """
    from sqlalchemy import literal_column

    cutoff = datetime.utcnow() - timedelta(days=days)

    # SQLite-compatible: use date() function or extract date from datetime
    # We'll select the raw datetime and group by date string
    result = await db.execute(
        select(
            TLDPrice.recorded_at,
            TLDPrice.registration_price,
        )
        .where(TLDPrice.tld == tld)
        .where(TLDPrice.recorded_at >= cutoff)
        .order_by(TLDPrice.recorded_at)
    )

    rows = result.all()

    if not rows:
        return []

    # Group by date in Python (SQLite-safe approach)
    daily_prices: dict[str, list[float]] = {}
    for row in rows:
        # Handle both datetime objects and strings
        if hasattr(row.recorded_at, 'strftime'):
            date_str = row.recorded_at.strftime("%Y-%m-%d")
        else:
            date_str = str(row.recorded_at)[:10]  # Take first 10 chars (YYYY-MM-DD)

        if date_str not in daily_prices:
            daily_prices[date_str] = []
        daily_prices[date_str].append(row.registration_price)

    # Calculate daily averages
    return [
        {
            "date": date_str,
            "price": round(sum(prices) / len(prices), 2),
        }
        for date_str, prices in sorted(daily_prices.items())
    ]


@router.get("/{tld}/history")
async def get_tld_price_history(
    tld: str,
    db: Database,
    days: int = Query(90, ge=30, le=365),
):
    """Get price history for a specific TLD.

    Returns REAL historical data from database if available (5+ data points),
    otherwise generates simulated data based on current price and known trends.

    Data Source Priority:
    1. Real DB data (from daily scrapes) - marked as source: "database"
    2. Simulated data based on trend - marked as source: "simulated"
    """
    import math

    tld_clean = tld.lower().lstrip(".")

    # Get current price from database
    db_prices = await get_db_prices(db, tld_clean)
    current_price = 0

    if db_prices and tld_clean in db_prices:
        prices = db_prices[tld_clean]["prices"]
        current_price = round(sum(prices) / len(prices), 2) if prices else 0

    # Get static data for metadata and trend info
    static_data = TLD_DATA.get(tld_clean, {})

    # Determine trend and current price
    if not current_price and static_data:
        current_price = get_avg_price(static_data)

    if not current_price:
        raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found")

    # Get trend info
    trend = static_data.get("trend", "stable")
    trend_reason = static_data.get("trend_reason", "Price tracking available")

    # ==========================================================================
    # TRY REAL HISTORICAL DATA FROM DATABASE FIRST
    # ==========================================================================
    real_history = await get_real_price_history(db, tld_clean, days)

    # Use real data if we have enough points (at least 5 data points)
    if len(real_history) >= 5:
        history = real_history
        data_source = "database"

        # Calculate price changes from real data
        price_7d_ago = None
        price_30d_ago = None
        price_90d_ago = None

        now = datetime.utcnow().date()
        for h in history:
            try:
                h_date = datetime.strptime(h["date"], "%Y-%m-%d").date()
                days_ago = (now - h_date).days

                if days_ago <= 7 and price_7d_ago is None:
                    price_7d_ago = h["price"]
                if days_ago <= 30 and price_30d_ago is None:
                    price_30d_ago = h["price"]
                if days_ago <= 90 and price_90d_ago is None:
                    price_90d_ago = h["price"]
            except (ValueError, TypeError):
                continue

        # Fallback to earliest available
        if price_7d_ago is None and history:
            price_7d_ago = history[-1]["price"]
        if price_30d_ago is None and history:
            price_30d_ago = history[0]["price"]
        if price_90d_ago is None and history:
            price_90d_ago = history[0]["price"]
    else:
        # ==========================================================================
        # FALLBACK: SIMULATED DATA BASED ON TREND
        # ==========================================================================
        data_source = "simulated"
    history = []
    current_date = datetime.utcnow()

    # Calculate trend factor based on known trends
    trend_factor = 1.0
    if trend == "up":
        trend_factor = 0.92  # Prices were ~8% lower
    elif trend == "down":
        trend_factor = 1.05  # Prices were ~5% higher

    # Generate weekly data points
    for i in range(days, -1, -7):
        date = current_date - timedelta(days=i)
        progress = 1 - (i / days)

        if trend == "up":
            price = current_price * (trend_factor + (1 - trend_factor) * progress)
        elif trend == "down":
            price = current_price * (trend_factor - (trend_factor - 1) * progress)
        else:
            # Add small fluctuation for stable prices
            fluctuation = math.sin(i * 0.1) * 0.02
            price = current_price * (1 + fluctuation)

        history.append({
            "date": date.strftime("%Y-%m-%d"),
            "price": round(price, 2),
        })

        # Calculate price changes from simulated data
    price_7d_ago = history[-2]["price"] if len(history) >= 2 else current_price
    price_30d_ago = history[-5]["price"] if len(history) >= 5 else current_price
    price_90d_ago = history[0]["price"] if history else current_price

    # Calculate percentage changes safely
    change_7d = round((current_price - price_7d_ago) / price_7d_ago * 100, 2) if price_7d_ago and price_7d_ago > 0 else 0
    change_30d = round((current_price - price_30d_ago) / price_30d_ago * 100, 2) if price_30d_ago and price_30d_ago > 0 else 0
    change_90d = round((current_price - price_90d_ago) / price_90d_ago * 100, 2) if price_90d_ago and price_90d_ago > 0 else 0

    return {
        "tld": tld_clean,
        "type": static_data.get("type", guess_tld_type(tld_clean)),
        "description": static_data.get("description", f".{tld_clean} domain extension"),
        "registry": static_data.get("registry", "Unknown"),
        "current_price": current_price,
        "price_change_7d": change_7d,
        "price_change_30d": change_30d,
        "price_change_90d": change_90d,
        "trend": trend,
        "trend_reason": trend_reason,
        "history": history,
        "source": data_source,
        "data_points": len(history),
    }


def calculate_trend(history: list) -> str:
    """Calculate trend from price history."""
    if len(history) < 2:
        return "stable"

    first_price = history[0]["price"]
    last_price = history[-1]["price"]

    if first_price == 0:
        return "stable"

    change_percent = (last_price - first_price) / first_price * 100

    if change_percent > 5:
        return "up"
    elif change_percent < -5:
        return "down"
    return "stable"


@router.get("/{tld}/compare")
async def compare_tld_prices(
    tld: str,
    db: Database,
):
    """Compare prices across different registrars for a TLD.

    COMBINES static data AND database data for complete registrar coverage.
    This ensures all scraped registrars (Porkbun, GoDaddy, Namecheap, etc.) appear.
    """
    tld_clean = tld.lower().lstrip(".")

    # Collect registrars from ALL sources
    registrars_map: dict[str, dict] = {}
    metadata = {
        "type": "generic",
        "description": f".{tld_clean} domain extension",
        "registry": "Unknown",
        "introduced": None,
    }

    # 1. Add static data (curated, high-quality)
    if tld_clean in TLD_DATA:
        data = TLD_DATA[tld_clean]
        metadata = {
            "type": data["type"],
            "description": data["description"],
            "registry": data.get("registry", "Unknown"),
            "introduced": data.get("introduced"),
        }

        for name, prices in data["registrars"].items():
            registrars_map[name.lower()] = {
                "name": name,
                "registration_price": prices["register"],
                "renewal_price": prices["renew"],
                "transfer_price": prices["transfer"],
                "source": "static",
            }

    # 2. Add/update with database data (scraped from multiple registrars)
    db_prices = await get_db_prices(db, tld_clean)
    if db_prices and tld_clean in db_prices:
        for registrar_name, prices in db_prices[tld_clean]["registrars"].items():
            key = registrar_name.lower()
            # Add if not exists, or update with fresher DB data
            if key not in registrars_map:
                registrars_map[key] = {
                    "name": registrar_name.title(),
            "registration_price": prices["register"],
            "renewal_price": prices["renew"],
                    "transfer_price": prices.get("transfer"),
                    "source": "database",
        }

    if not registrars_map:
        raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found")

    # Convert to list and sort by price
    registrars = list(registrars_map.values())
    registrars.sort(key=lambda x: x["registration_price"])

    # Calculate price range from all registrars
    all_prices = [r["registration_price"] for r in registrars]

    return {
        "tld": tld_clean,
        "type": metadata["type"],
        "description": metadata["description"],
        "registry": metadata["registry"],
        "introduced": metadata["introduced"],
        "registrars": registrars,
        "cheapest_registrar": registrars[0]["name"],
        "cheapest_price": registrars[0]["registration_price"],
        "price_range": {
            "min": min(all_prices),
            "max": max(all_prices),
            "avg": round(sum(all_prices) / len(all_prices), 2),
        },
        "registrar_count": len(registrars),
    }


@router.get("/{tld}")
async def get_tld_details(
    tld: str,
    db: Database,
):
    """Get complete details for a specific TLD."""
    tld_clean = tld.lower().lstrip(".")

    # Try static data first
    if tld_clean in TLD_DATA:
        data = TLD_DATA[tld_clean]

        registrars = []
        for name, prices in data["registrars"].items():
            registrars.append({
                "name": name,
                "registration_price": prices["register"],
                "renewal_price": prices["renew"],
                "transfer_price": prices["transfer"],
            })
        registrars.sort(key=lambda x: x["registration_price"])

        return {
            "tld": tld_clean,
            "type": data["type"],
            "description": data["description"],
            "registry": data.get("registry", "Unknown"),
            "introduced": data.get("introduced"),
            "trend": data["trend"],
            "trend_reason": data["trend_reason"],
            "pricing": {
                "avg": get_avg_price(data),
                "min": get_min_price(data),
                "max": get_max_price(data),
            },
            "registrars": registrars,
            "cheapest_registrar": registrars[0]["name"],
        }

    # Fall back to database
    db_prices = await get_db_prices(db, tld_clean)
    if not db_prices:
        raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found")

    tld_data = db_prices[tld_clean]
    registrars = [
        {
            "name": name,
            "registration_price": prices["register"],
            "renewal_price": prices["renew"],
            "transfer_price": prices["transfer"],
        }
        for name, prices in tld_data["registrars"].items()
    ]
    registrars.sort(key=lambda x: x["registration_price"])

    prices = tld_data["prices"]

    return {
        "tld": tld_clean,
        "type": guess_tld_type(tld_clean),
        "description": f".{tld_clean} domain extension",
        "registry": "Unknown",
        "introduced": None,
        "trend": "stable",
        "trend_reason": "Price tracking started recently",
        "pricing": {
            "avg": round(sum(prices) / len(prices), 2) if prices else 0,
            "min": min(prices) if prices else 0,
            "max": max(prices) if prices else 0,
        },
        "registrars": registrars,
        "cheapest_registrar": registrars[0]["name"] if registrars else "N/A",
    }


# =============================================================================
# DIAGNOSTIC ENDPOINTS - Data Quality & Historical Stats
# =============================================================================

@router.get("/stats/data-quality")
async def get_data_quality_stats(db: Database):
    """
    Get statistics about historical data quality.

    Useful for monitoring:
    - How many TLDs have real historical data
    - Date range of collected data
    - Scraping frequency and gaps
    """
    from sqlalchemy import cast, Date as SQLDate

    # Total TLDs tracked
    tld_count = await db.execute(select(func.count(func.distinct(TLDPrice.tld))))
    total_tlds = tld_count.scalar() or 0

    # Total price records
    record_count = await db.execute(select(func.count(TLDPrice.id)))
    total_records = record_count.scalar() or 0

    # Date range
    date_range = await db.execute(
        select(
            func.min(TLDPrice.recorded_at).label("first_record"),
            func.max(TLDPrice.recorded_at).label("last_record"),
        )
    )
    dates = date_range.one()

    # Unique scrape days (how many days we have data)
    # SQLite-compatible: count distinct date strings
    all_dates = await db.execute(select(TLDPrice.recorded_at))
    date_rows = all_dates.all()
    unique_date_strs = set()
    for row in date_rows:
        if hasattr(row.recorded_at, 'strftime'):
            unique_date_strs.add(row.recorded_at.strftime("%Y-%m-%d"))
        elif row.recorded_at:
            unique_date_strs.add(str(row.recorded_at)[:10])
    scrape_days = len(unique_date_strs)

    # TLDs with 5+ historical data points (enough for real charts)
    tlds_with_history = await db.execute(
        select(func.count())
        .select_from(
            select(TLDPrice.tld)
            .group_by(TLDPrice.tld)
            .having(func.count(TLDPrice.id) >= 5)
            .subquery()
        )
    )
    chartable_tlds = tlds_with_history.scalar() or 0

    # Registrars in database
    registrar_count = await db.execute(
        select(func.count(func.distinct(TLDPrice.registrar)))
    )
    total_registrars = registrar_count.scalar() or 0

    # Calculate coverage
    days_of_data = 0
    if dates.first_record and dates.last_record:
        days_of_data = (dates.last_record - dates.first_record).days + 1

    coverage_percent = round((scrape_days / days_of_data * 100), 1) if days_of_data > 0 else 0

    return {
        "summary": {
            "total_tlds_tracked": total_tlds,
            "total_price_records": total_records,
            "tlds_with_real_history": chartable_tlds,
            "unique_registrars": total_registrars,
        },
        "time_range": {
            "first_record": dates.first_record.isoformat() if dates.first_record else None,
            "last_record": dates.last_record.isoformat() if dates.last_record else None,
            "days_of_data": days_of_data,
            "days_with_scrapes": scrape_days,
            "coverage_percent": coverage_percent,
        },
        "chart_readiness": {
            "tlds_ready_for_charts": chartable_tlds,
            "tlds_using_simulation": total_tlds - chartable_tlds,
            "recommendation": "Run daily scrapes for 7+ days to enable real charts" if chartable_tlds < 10 else "Good coverage!",
        },
        "data_sources": {
            "static_tlds": len(TLD_DATA),
            "database_tlds": total_tlds,
            "combined_coverage": len(TLD_DATA) + max(0, total_tlds - len(TLD_DATA)),
        }
    }


@router.get("/stats/scrape-history")
async def get_scrape_history(
    db: Database,
    days: int = Query(30, ge=1, le=365),
):
    """
    Get scraping history - shows when scrapes ran and how many records were collected.

    Useful for:
    - Identifying gaps in data collection
    - Verifying scheduler is working
    - Troubleshooting data issues
    """
    cutoff = datetime.utcnow() - timedelta(days=days)

    # SQLite-compatible: fetch all and group in Python
    result = await db.execute(
        select(TLDPrice.recorded_at, TLDPrice.tld)
        .where(TLDPrice.recorded_at >= cutoff)
    )
    rows = result.all()

    # Group by date in Python
    daily_data: dict[str, dict] = {}
    for row in rows:
        if hasattr(row.recorded_at, 'strftime'):
            date_str = row.recorded_at.strftime("%Y-%m-%d")
        elif row.recorded_at:
            date_str = str(row.recorded_at)[:10]
        else:
            continue

        if date_str not in daily_data:
            daily_data[date_str] = {"records": 0, "tlds": set()}
        daily_data[date_str]["records"] += 1
        daily_data[date_str]["tlds"].add(row.tld)

    # Convert to list and sort by date descending
    scrape_history = [
        {
            "date": date_str,
            "records_collected": data["records"],
            "tlds_scraped": len(data["tlds"]),
        }
        for date_str, data in sorted(daily_data.items(), reverse=True)
    ]

    total_records = sum(h["records_collected"] for h in scrape_history)

    return {
        "period_days": days,
        "total_scrape_days": len(scrape_history),
        "history": scrape_history,
        "avg_records_per_day": round(total_records / len(scrape_history), 0) if scrape_history else 0,
    }