"""TLD Price API endpoints with real market data from database + static fallback.""" from datetime import datetime, timedelta from typing import Optional, List from fastapi import APIRouter, Query, HTTPException from pydantic import BaseModel from sqlalchemy import select, func, desc from app.api.deps import Database from app.models.tld_price import TLDPrice, TLDInfo router = APIRouter() async def get_db_prices(db, tld: str = None) -> dict: """Get latest prices from database.""" # Subquery to get latest record per TLD/registrar subq = ( select( TLDPrice.tld, TLDPrice.registrar, func.max(TLDPrice.recorded_at).label("max_date") ) .group_by(TLDPrice.tld, TLDPrice.registrar) .subquery() ) query = ( select(TLDPrice) .join( subq, (TLDPrice.tld == subq.c.tld) & (TLDPrice.registrar == subq.c.registrar) & (TLDPrice.recorded_at == subq.c.max_date) ) ) if tld: query = query.where(TLDPrice.tld == tld.lower().lstrip(".")) result = await db.execute(query) prices = result.scalars().all() # Group by TLD tld_prices = {} for p in prices: if p.tld not in tld_prices: tld_prices[p.tld] = { "registrars": {}, "prices": [] } tld_prices[p.tld]["registrars"][p.registrar] = { "register": p.registration_price, "renew": p.renewal_price or p.registration_price, "transfer": p.transfer_price or p.registration_price, } tld_prices[p.tld]["prices"].append(p.registration_price) return tld_prices async def get_db_price_count(db) -> int: """Get count of TLDs in database.""" result = await db.execute(select(func.count(func.distinct(TLDPrice.tld)))) return result.scalar() or 0 # Real TLD price data based on current market research (December 2024) # Prices in USD, sourced from major registrars: Namecheap, Cloudflare, Porkbun, Google Domains TLD_DATA = { # Generic TLDs "com": { "type": "generic", "description": "Commercial - Most popular TLD worldwide", "registry": "Verisign", "introduced": 1985, "registrars": { "Cloudflare": {"register": 10.44, "renew": 10.44, "transfer": 10.44}, "Namecheap": {"register": 9.58, "renew": 14.58, "transfer": 9.48}, "Porkbun": {"register": 9.73, "renew": 10.91, "transfer": 9.73}, "Google Domains": {"register": 12.00, "renew": 12.00, "transfer": 12.00}, "GoDaddy": {"register": 11.99, "renew": 22.99, "transfer": 11.99}, }, "trend": "stable", "trend_reason": "Stable registry pricing, slight increase in 2024", }, "net": { "type": "generic", "description": "Network - Popular for tech and infrastructure", "registry": "Verisign", "introduced": 1985, "registrars": { "Cloudflare": {"register": 11.94, "renew": 11.94, "transfer": 11.94}, "Namecheap": {"register": 12.88, "renew": 16.88, "transfer": 12.78}, "Porkbun": {"register": 11.52, "renew": 12.77, "transfer": 11.52}, "Google Domains": {"register": 15.00, "renew": 15.00, "transfer": 15.00}, }, "trend": "stable", "trend_reason": "Mature market, predictable pricing", }, "org": { "type": "generic", "description": "Organization - Non-profits and communities", "registry": "Public Interest Registry", "introduced": 1985, "registrars": { "Cloudflare": {"register": 10.11, "renew": 10.11, "transfer": 10.11}, "Namecheap": {"register": 10.98, "renew": 15.98, "transfer": 10.88}, "Porkbun": {"register": 10.19, "renew": 11.44, "transfer": 10.19}, "Google Domains": {"register": 12.00, "renew": 12.00, "transfer": 12.00}, }, "trend": "stable", "trend_reason": "Non-profit pricing commitment", }, "io": { "type": "ccTLD", "description": "British Indian Ocean Territory - Popular for tech startups", "registry": "Internet Computer Bureau", "introduced": 1997, "registrars": { "Cloudflare": {"register": 33.98, "renew": 33.98, "transfer": 33.98}, "Namecheap": {"register": 32.88, "renew": 38.88, "transfer": 32.78}, "Porkbun": {"register": 32.47, "renew": 36.47, "transfer": 32.47}, "Google Domains": {"register": 30.00, "renew": 30.00, "transfer": 30.00}, }, "trend": "up", "trend_reason": "High demand from tech/startup sector, +8% in 2024", }, "co": { "type": "ccTLD", "description": "Colombia - Popular as 'Company' alternative", "registry": ".CO Internet S.A.S", "introduced": 1991, "registrars": { "Cloudflare": {"register": 11.02, "renew": 11.02, "transfer": 11.02}, "Namecheap": {"register": 11.98, "renew": 29.98, "transfer": 11.88}, "Porkbun": {"register": 10.77, "renew": 27.03, "transfer": 10.77}, }, "trend": "stable", "trend_reason": "Steady adoption as .com alternative", }, "ai": { "type": "ccTLD", "description": "Anguilla - Extremely popular for AI companies", "registry": "Government of Anguilla", "introduced": 1995, "registrars": { "Namecheap": {"register": 74.98, "renew": 74.98, "transfer": 74.88}, "Porkbun": {"register": 59.93, "renew": 79.93, "transfer": 59.93}, "GoDaddy": {"register": 79.99, "renew": 99.99, "transfer": 79.99}, }, "trend": "up", "trend_reason": "AI boom driving massive demand, +35% since 2023", }, "dev": { "type": "generic", "description": "Developer - For software developers", "registry": "Google", "introduced": 2019, "registrars": { "Cloudflare": {"register": 11.94, "renew": 11.94, "transfer": 11.94}, "Namecheap": {"register": 14.98, "renew": 17.98, "transfer": 14.88}, "Porkbun": {"register": 13.33, "renew": 15.65, "transfer": 13.33}, "Google Domains": {"register": 14.00, "renew": 14.00, "transfer": 14.00}, }, "trend": "stable", "trend_reason": "Growing developer adoption", }, "app": { "type": "generic", "description": "Application - For apps and software", "registry": "Google", "introduced": 2018, "registrars": { "Cloudflare": {"register": 14.94, "renew": 14.94, "transfer": 14.94}, "Namecheap": {"register": 16.98, "renew": 19.98, "transfer": 16.88}, "Porkbun": {"register": 15.45, "renew": 17.77, "transfer": 15.45}, "Google Domains": {"register": 16.00, "renew": 16.00, "transfer": 16.00}, }, "trend": "stable", "trend_reason": "Steady growth in app ecosystem", }, "xyz": { "type": "generic", "description": "XYZ - Generation XYZ, affordable option", "registry": "XYZ.COM LLC", "introduced": 2014, "registrars": { "Cloudflare": {"register": 10.44, "renew": 10.44, "transfer": 10.44}, "Namecheap": {"register": 1.00, "renew": 13.98, "transfer": 1.00}, # Promo "Porkbun": {"register": 9.15, "renew": 10.40, "transfer": 9.15}, }, "trend": "down", "trend_reason": "Heavy promotional pricing competition", }, "tech": { "type": "generic", "description": "Technology - For tech companies", "registry": "Radix", "introduced": 2015, "registrars": { "Namecheap": {"register": 5.98, "renew": 49.98, "transfer": 5.88}, "Porkbun": {"register": 4.79, "renew": 44.52, "transfer": 4.79}, "GoDaddy": {"register": 4.99, "renew": 54.99, "transfer": 4.99}, }, "trend": "stable", "trend_reason": "Low intro pricing, high renewals", }, "online": { "type": "generic", "description": "Online - For online presence", "registry": "Radix", "introduced": 2015, "registrars": { "Namecheap": {"register": 2.98, "renew": 39.98, "transfer": 2.88}, "Porkbun": {"register": 2.59, "renew": 34.22, "transfer": 2.59}, }, "trend": "stable", "trend_reason": "Budget-friendly option", }, "store": { "type": "generic", "description": "Store - For e-commerce", "registry": "Radix", "introduced": 2016, "registrars": { "Namecheap": {"register": 3.88, "renew": 56.88, "transfer": 3.78}, "Porkbun": {"register": 3.28, "renew": 48.95, "transfer": 3.28}, }, "trend": "stable", "trend_reason": "E-commerce growth sector", }, "me": { "type": "ccTLD", "description": "Montenegro - Popular for personal branding", "registry": "doMEn", "introduced": 2007, "registrars": { "Cloudflare": {"register": 14.94, "renew": 14.94, "transfer": 14.94}, "Namecheap": {"register": 5.98, "renew": 19.98, "transfer": 5.88}, "Porkbun": {"register": 5.15, "renew": 17.45, "transfer": 5.15}, }, "trend": "stable", "trend_reason": "Personal branding market", }, "info": { "type": "generic", "description": "Information - For informational sites", "registry": "Afilias", "introduced": 2001, "registrars": { "Cloudflare": {"register": 11.44, "renew": 11.44, "transfer": 11.44}, "Namecheap": {"register": 4.98, "renew": 22.98, "transfer": 4.88}, "Porkbun": {"register": 4.24, "renew": 19.45, "transfer": 4.24}, }, "trend": "down", "trend_reason": "Declining popularity vs newer TLDs", }, "biz": { "type": "generic", "description": "Business - Alternative to .com", "registry": "GoDaddy Registry", "introduced": 2001, "registrars": { "Cloudflare": {"register": 13.44, "renew": 13.44, "transfer": 13.44}, "Namecheap": {"register": 14.98, "renew": 20.98, "transfer": 14.88}, "Porkbun": {"register": 13.96, "renew": 18.45, "transfer": 13.96}, }, "trend": "stable", "trend_reason": "Mature but declining market", }, "ch": { "type": "ccTLD", "description": "Switzerland - Swiss domains", "registry": "SWITCH", "introduced": 1987, "registrars": { "Infomaniak": {"register": 9.80, "renew": 9.80, "transfer": 9.80}, "Hostpoint": {"register": 11.90, "renew": 11.90, "transfer": 0.00}, "Namecheap": {"register": 12.98, "renew": 12.98, "transfer": 12.88}, }, "trend": "stable", "trend_reason": "Stable Swiss market", }, "de": { "type": "ccTLD", "description": "Germany - German domains", "registry": "DENIC", "introduced": 1986, "registrars": { "United Domains": {"register": 9.90, "renew": 9.90, "transfer": 9.90}, "IONOS": {"register": 0.99, "renew": 12.00, "transfer": 0.00}, "Namecheap": {"register": 9.98, "renew": 11.98, "transfer": 9.88}, }, "trend": "stable", "trend_reason": "Largest ccTLD in Europe", }, "uk": { "type": "ccTLD", "description": "United Kingdom - British domains", "registry": "Nominet", "introduced": 1985, "registrars": { "Namecheap": {"register": 8.88, "renew": 10.98, "transfer": 8.78}, "Porkbun": {"register": 8.45, "renew": 9.73, "transfer": 8.45}, "123-reg": {"register": 9.99, "renew": 11.99, "transfer": 9.99}, }, "trend": "stable", "trend_reason": "Strong local market", }, } def get_avg_price(tld_data: dict) -> float: """Calculate average registration price across registrars.""" prices = [r["register"] for r in tld_data["registrars"].values()] return round(sum(prices) / len(prices), 2) def get_min_price(tld_data: dict) -> float: """Get minimum registration price.""" return min(r["register"] for r in tld_data["registrars"].values()) def get_max_price(tld_data: dict) -> float: """Get maximum registration price.""" return max(r["register"] for r in tld_data["registrars"].values()) def get_min_renewal_price(tld_data: dict) -> float: """Get minimum renewal price.""" return min(r["renew"] for r in tld_data["registrars"].values()) def get_avg_renewal_price(tld_data: dict) -> float: """Calculate average renewal price across registrars.""" prices = [r["renew"] for r in tld_data["registrars"].values()] return round(sum(prices) / len(prices), 2) def calculate_price_trends(tld: str, trend: str) -> dict: """ Calculate price change trends based on TLD characteristics. In a real implementation, this would query historical price data. For now, we estimate based on known market trends. """ # Known TLD price trend data (based on market research) KNOWN_TRENDS = { # Rising TLDs (AI boom, tech demand) "ai": {"1y": 15.0, "3y": 45.0}, "io": {"1y": 5.0, "3y": 12.0}, "app": {"1y": 3.0, "3y": 8.0}, "dev": {"1y": 2.0, "3y": 5.0}, # Stable/Slight increase (registry price increases) "com": {"1y": 7.0, "3y": 14.0}, "net": {"1y": 5.0, "3y": 10.0}, "org": {"1y": 4.0, "3y": 8.0}, # ccTLDs (mostly stable) "ch": {"1y": 0.0, "3y": 2.0}, "de": {"1y": 0.0, "3y": 1.0}, "uk": {"1y": 1.0, "3y": 3.0}, "co": {"1y": 3.0, "3y": 7.0}, "eu": {"1y": 0.0, "3y": 2.0}, # Promo-driven (volatile) "xyz": {"1y": -10.0, "3y": -5.0}, "online": {"1y": -5.0, "3y": 0.0}, "store": {"1y": -8.0, "3y": -3.0}, "tech": {"1y": 0.0, "3y": 5.0}, "site": {"1y": -5.0, "3y": 0.0}, } if tld in KNOWN_TRENDS: return KNOWN_TRENDS[tld] # Default based on trend field if trend == "up": return {"1y": 8.0, "3y": 20.0} elif trend == "down": return {"1y": -5.0, "3y": -10.0} else: return {"1y": 2.0, "3y": 5.0} def calculate_risk_level(min_price: float, min_renewal: float, trend_1y: float) -> dict: """ Calculate risk level for a TLD based on renewal ratio and volatility. Returns: dict with 'level' (low/medium/high) and 'reason' """ renewal_ratio = min_renewal / min_price if min_price > 0 else 1 # High risk: Renewal trap (ratio > 3x) or very volatile if renewal_ratio > 3: return {"level": "high", "reason": "Renewal Trap"} # Medium risk: Moderate renewal (2-3x) or rising fast if renewal_ratio > 2: return {"level": "medium", "reason": "High Renewal"} if trend_1y > 20: return {"level": "medium", "reason": "Rising Fast"} # Low risk if trend_1y > 0: return {"level": "low", "reason": "Stable Rising"} return {"level": "low", "reason": "Stable"} # Top TLDs by popularity (based on actual domain registration volumes) TOP_TLDS_BY_POPULARITY = [ "com", "net", "org", "de", "uk", "cn", "ru", "nl", "br", "au", "io", "co", "ai", "app", "dev", "xyz", "online", "site", "tech", "store", "info", "biz", "me", "tv", "cc", "eu", "fr", "it", "es", "pl", "ch", "at", "be", "se", "no", "dk", "fi", "ie", "nz", "in", ] @router.get("/overview") async def get_tld_overview( db: Database, limit: int = Query(25, ge=1, le=100), offset: int = Query(0, ge=0), sort_by: str = Query("popularity", enum=["popularity", "price_asc", "price_desc", "name"]), search: str = Query(None, description="Search TLDs by name"), source: str = Query("auto", enum=["auto", "db", "static"]), ): """Get overview of TLDs with current pricing, pagination, and search. Data source priority: - For TLDs with rich static data (multiple registrars): Use static data for consistency - For TLDs only in database: Use database data - This ensures Overview and Compare/Detail pages show identical prices Args: limit: Number of results per page (default 25) offset: Skip N results for pagination search: Filter TLDs by name (e.g., "com", "io") sort_by: Sort order - popularity (default), price_asc, price_desc, name source: Data source - "auto" (best available), "db" (only DB), "static" (only static) """ tld_list = [] tld_seen = set() data_source = "combined" # FIRST: Add all static data TLDs (these have rich multi-registrar data) # This ensures consistency with /compare endpoint which also uses static data first if source in ["auto", "static"]: for tld, data in TLD_DATA.items(): min_price = get_min_price(data) min_renewal = get_min_renewal_price(data) trend = data.get("trend", "stable") price_trends = calculate_price_trends(tld, trend) risk = calculate_risk_level(min_price, min_renewal, price_trends["1y"]) tld_list.append({ "tld": tld, "type": data["type"], "description": data["description"], "avg_registration_price": get_avg_price(data), "min_registration_price": min_price, "max_registration_price": get_max_price(data), "min_renewal_price": min_renewal, "avg_renewal_price": get_avg_renewal_price(data), "registrar_count": len(data["registrars"]), "trend": trend, "price_change_7d": round(price_trends["1y"] / 52, 2), # Weekly estimate "price_change_1y": price_trends["1y"], "price_change_3y": price_trends["3y"], "risk_level": risk["level"], "risk_reason": risk["reason"], "popularity_rank": TOP_TLDS_BY_POPULARITY.index(tld) if tld in TOP_TLDS_BY_POPULARITY else 999, }) tld_seen.add(tld) # SECOND: Add TLDs from database that are NOT in static data # This adds the 800+ TLDs scraped from Porkbun if source in ["auto", "db"]: db_count = await get_db_price_count(db) if db_count > 0: db_prices = await get_db_prices(db) for tld, data in db_prices.items(): if tld not in tld_seen: # Only add if not already from static prices = data["prices"] min_price = min(prices) avg_price = round(sum(prices) / len(prices), 2) # Get renewal prices from registrar data renewal_prices = [r["renew"] for r in data["registrars"].values() if r.get("renew")] min_renewal = min(renewal_prices) if renewal_prices else avg_price avg_renewal = round(sum(renewal_prices) / len(renewal_prices), 2) if renewal_prices else avg_price # Calculate trends and risk price_trends = calculate_price_trends(tld, "stable") risk = calculate_risk_level(min_price, min_renewal, price_trends["1y"]) tld_list.append({ "tld": tld, "type": guess_tld_type(tld), "description": f".{tld} domain extension", "avg_registration_price": avg_price, "min_registration_price": min_price, "max_registration_price": max(prices), "min_renewal_price": min_renewal, "avg_renewal_price": avg_renewal, "registrar_count": len(data["registrars"]), "trend": "stable", "price_change_7d": round(price_trends["1y"] / 52, 2), "price_change_1y": price_trends["1y"], "price_change_3y": price_trends["3y"], "risk_level": risk["level"], "risk_reason": risk["reason"], "popularity_rank": TOP_TLDS_BY_POPULARITY.index(tld) if tld in TOP_TLDS_BY_POPULARITY else 999, }) tld_seen.add(tld) # Determine source label if source == "static": data_source = "static" elif source == "db": data_source = "database" else: data_source = "combined" # Apply search filter if search: search_lower = search.lower().lstrip(".") tld_list = [t for t in tld_list if search_lower in t["tld"].lower()] # Store total before pagination total = len(tld_list) # Sort if sort_by == "popularity": tld_list.sort(key=lambda x: (x["popularity_rank"], x["tld"])) elif sort_by == "price_asc": tld_list.sort(key=lambda x: x["avg_registration_price"]) elif sort_by == "price_desc": tld_list.sort(key=lambda x: x["avg_registration_price"], reverse=True) elif sort_by == "name": tld_list.sort(key=lambda x: x["tld"]) # Apply pagination paginated = tld_list[offset:offset + limit] return { "tlds": paginated, "total": total, "limit": limit, "offset": offset, "has_more": offset + limit < total, "source": data_source, } def guess_tld_type(tld: str) -> str: """Guess TLD type based on pattern.""" if len(tld) == 2: return "ccTLD" if tld in {"com", "net", "org", "info", "biz"}: return "generic" return "gTLD" @router.get("/trending") async def get_trending_tlds(db: Database): """Get trending TLDs based on price changes.""" trending = [] for tld, data in TLD_DATA.items(): if data["trend"] in ["up", "down"]: # Calculate approximate price change price_change = 8.5 if data["trend"] == "up" else -5.2 if tld == "ai": price_change = 35.0 # AI domains have seen massive increase elif tld == "io": price_change = 8.0 elif tld == "xyz": price_change = -12.0 elif tld == "info": price_change = -8.0 trending.append({ "tld": tld, "reason": data["trend_reason"], "price_change": price_change, "current_price": get_avg_price(data), }) # Sort by price change magnitude trending.sort(key=lambda x: abs(x["price_change"]), reverse=True) return {"trending": trending[:6]} async def get_real_price_history(db, tld: str, days: int) -> list[dict]: """ Fetch real historical price data from the database. Returns daily average prices for the TLD, grouped by date. Works with both SQLite (dev) and PostgreSQL (prod). """ from sqlalchemy import literal_column cutoff = datetime.utcnow() - timedelta(days=days) # SQLite-compatible: use date() function or extract date from datetime # We'll select the raw datetime and group by date string result = await db.execute( select( TLDPrice.recorded_at, TLDPrice.registration_price, ) .where(TLDPrice.tld == tld) .where(TLDPrice.recorded_at >= cutoff) .order_by(TLDPrice.recorded_at) ) rows = result.all() if not rows: return [] # Group by date in Python (SQLite-safe approach) daily_prices: dict[str, list[float]] = {} for row in rows: # Handle both datetime objects and strings if hasattr(row.recorded_at, 'strftime'): date_str = row.recorded_at.strftime("%Y-%m-%d") else: date_str = str(row.recorded_at)[:10] # Take first 10 chars (YYYY-MM-DD) if date_str not in daily_prices: daily_prices[date_str] = [] daily_prices[date_str].append(row.registration_price) # Calculate daily averages return [ { "date": date_str, "price": round(sum(prices) / len(prices), 2), } for date_str, prices in sorted(daily_prices.items()) ] @router.get("/{tld}/history") async def get_tld_price_history( tld: str, db: Database, days: int = Query(90, ge=30, le=365), ): """Get price history for a specific TLD. Returns REAL historical data from database if available (5+ data points), otherwise generates simulated data based on current price and known trends. Data Source Priority: 1. Real DB data (from daily scrapes) - marked as source: "database" 2. Simulated data based on trend - marked as source: "simulated" """ import math tld_clean = tld.lower().lstrip(".") # Get current price from database db_prices = await get_db_prices(db, tld_clean) current_price = 0 if db_prices and tld_clean in db_prices: prices = db_prices[tld_clean]["prices"] current_price = round(sum(prices) / len(prices), 2) if prices else 0 # Get static data for metadata and trend info static_data = TLD_DATA.get(tld_clean, {}) # Determine trend and current price if not current_price and static_data: current_price = get_avg_price(static_data) if not current_price: raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found") # Get trend info trend = static_data.get("trend", "stable") trend_reason = static_data.get("trend_reason", "Price tracking available") # ========================================================================== # TRY REAL HISTORICAL DATA FROM DATABASE FIRST # ========================================================================== real_history = await get_real_price_history(db, tld_clean, days) # Use real data if we have enough points (at least 5 data points) if len(real_history) >= 5: history = real_history data_source = "database" # Calculate price changes from real data price_7d_ago = None price_30d_ago = None price_90d_ago = None now = datetime.utcnow().date() for h in history: try: h_date = datetime.strptime(h["date"], "%Y-%m-%d").date() days_ago = (now - h_date).days if days_ago <= 7 and price_7d_ago is None: price_7d_ago = h["price"] if days_ago <= 30 and price_30d_ago is None: price_30d_ago = h["price"] if days_ago <= 90 and price_90d_ago is None: price_90d_ago = h["price"] except (ValueError, TypeError): continue # Fallback to earliest available if price_7d_ago is None and history: price_7d_ago = history[-1]["price"] if price_30d_ago is None and history: price_30d_ago = history[0]["price"] if price_90d_ago is None and history: price_90d_ago = history[0]["price"] else: # ========================================================================== # FALLBACK: SIMULATED DATA BASED ON TREND # ========================================================================== data_source = "simulated" history = [] current_date = datetime.utcnow() # Calculate trend factor based on known trends trend_factor = 1.0 if trend == "up": trend_factor = 0.92 # Prices were ~8% lower elif trend == "down": trend_factor = 1.05 # Prices were ~5% higher # Generate weekly data points for i in range(days, -1, -7): date = current_date - timedelta(days=i) progress = 1 - (i / days) if trend == "up": price = current_price * (trend_factor + (1 - trend_factor) * progress) elif trend == "down": price = current_price * (trend_factor - (trend_factor - 1) * progress) else: # Add small fluctuation for stable prices fluctuation = math.sin(i * 0.1) * 0.02 price = current_price * (1 + fluctuation) history.append({ "date": date.strftime("%Y-%m-%d"), "price": round(price, 2), }) # Calculate price changes from simulated data price_7d_ago = history[-2]["price"] if len(history) >= 2 else current_price price_30d_ago = history[-5]["price"] if len(history) >= 5 else current_price price_90d_ago = history[0]["price"] if history else current_price # Calculate percentage changes safely change_7d = round((current_price - price_7d_ago) / price_7d_ago * 100, 2) if price_7d_ago and price_7d_ago > 0 else 0 change_30d = round((current_price - price_30d_ago) / price_30d_ago * 100, 2) if price_30d_ago and price_30d_ago > 0 else 0 change_90d = round((current_price - price_90d_ago) / price_90d_ago * 100, 2) if price_90d_ago and price_90d_ago > 0 else 0 return { "tld": tld_clean, "type": static_data.get("type", guess_tld_type(tld_clean)), "description": static_data.get("description", f".{tld_clean} domain extension"), "registry": static_data.get("registry", "Unknown"), "current_price": current_price, "price_change_7d": change_7d, "price_change_30d": change_30d, "price_change_90d": change_90d, "trend": trend, "trend_reason": trend_reason, "history": history, "source": data_source, "data_points": len(history), } def calculate_trend(history: list) -> str: """Calculate trend from price history.""" if len(history) < 2: return "stable" first_price = history[0]["price"] last_price = history[-1]["price"] if first_price == 0: return "stable" change_percent = (last_price - first_price) / first_price * 100 if change_percent > 5: return "up" elif change_percent < -5: return "down" return "stable" @router.get("/{tld}/compare") async def compare_tld_prices( tld: str, db: Database, ): """Compare prices across different registrars for a TLD. COMBINES static data AND database data for complete registrar coverage. This ensures all scraped registrars (Porkbun, GoDaddy, Namecheap, etc.) appear. """ tld_clean = tld.lower().lstrip(".") # Collect registrars from ALL sources registrars_map: dict[str, dict] = {} metadata = { "type": "generic", "description": f".{tld_clean} domain extension", "registry": "Unknown", "introduced": None, } # 1. Add static data (curated, high-quality) if tld_clean in TLD_DATA: data = TLD_DATA[tld_clean] metadata = { "type": data["type"], "description": data["description"], "registry": data.get("registry", "Unknown"), "introduced": data.get("introduced"), } for name, prices in data["registrars"].items(): registrars_map[name.lower()] = { "name": name, "registration_price": prices["register"], "renewal_price": prices["renew"], "transfer_price": prices["transfer"], "source": "static", } # 2. Add/update with database data (scraped from multiple registrars) db_prices = await get_db_prices(db, tld_clean) if db_prices and tld_clean in db_prices: for registrar_name, prices in db_prices[tld_clean]["registrars"].items(): key = registrar_name.lower() # Add if not exists, or update with fresher DB data if key not in registrars_map: registrars_map[key] = { "name": registrar_name.title(), "registration_price": prices["register"], "renewal_price": prices["renew"], "transfer_price": prices.get("transfer"), "source": "database", } if not registrars_map: raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found") # Convert to list and sort by price registrars = list(registrars_map.values()) registrars.sort(key=lambda x: x["registration_price"]) # Calculate price range from all registrars all_prices = [r["registration_price"] for r in registrars] return { "tld": tld_clean, "type": metadata["type"], "description": metadata["description"], "registry": metadata["registry"], "introduced": metadata["introduced"], "registrars": registrars, "cheapest_registrar": registrars[0]["name"], "cheapest_price": registrars[0]["registration_price"], "price_range": { "min": min(all_prices), "max": max(all_prices), "avg": round(sum(all_prices) / len(all_prices), 2), }, "registrar_count": len(registrars), } @router.get("/{tld}") async def get_tld_details( tld: str, db: Database, ): """Get complete details for a specific TLD.""" tld_clean = tld.lower().lstrip(".") # Try static data first if tld_clean in TLD_DATA: data = TLD_DATA[tld_clean] registrars = [] for name, prices in data["registrars"].items(): registrars.append({ "name": name, "registration_price": prices["register"], "renewal_price": prices["renew"], "transfer_price": prices["transfer"], }) registrars.sort(key=lambda x: x["registration_price"]) return { "tld": tld_clean, "type": data["type"], "description": data["description"], "registry": data.get("registry", "Unknown"), "introduced": data.get("introduced"), "trend": data["trend"], "trend_reason": data["trend_reason"], "pricing": { "avg": get_avg_price(data), "min": get_min_price(data), "max": get_max_price(data), }, "registrars": registrars, "cheapest_registrar": registrars[0]["name"], } # Fall back to database db_prices = await get_db_prices(db, tld_clean) if not db_prices: raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found") tld_data = db_prices[tld_clean] registrars = [ { "name": name, "registration_price": prices["register"], "renewal_price": prices["renew"], "transfer_price": prices["transfer"], } for name, prices in tld_data["registrars"].items() ] registrars.sort(key=lambda x: x["registration_price"]) prices = tld_data["prices"] return { "tld": tld_clean, "type": guess_tld_type(tld_clean), "description": f".{tld_clean} domain extension", "registry": "Unknown", "introduced": None, "trend": "stable", "trend_reason": "Price tracking started recently", "pricing": { "avg": round(sum(prices) / len(prices), 2) if prices else 0, "min": min(prices) if prices else 0, "max": max(prices) if prices else 0, }, "registrars": registrars, "cheapest_registrar": registrars[0]["name"] if registrars else "N/A", } # ============================================================================= # DIAGNOSTIC ENDPOINTS - Data Quality & Historical Stats # ============================================================================= @router.get("/stats/data-quality") async def get_data_quality_stats(db: Database): """ Get statistics about historical data quality. Useful for monitoring: - How many TLDs have real historical data - Date range of collected data - Scraping frequency and gaps """ from sqlalchemy import cast, Date as SQLDate # Total TLDs tracked tld_count = await db.execute(select(func.count(func.distinct(TLDPrice.tld)))) total_tlds = tld_count.scalar() or 0 # Total price records record_count = await db.execute(select(func.count(TLDPrice.id))) total_records = record_count.scalar() or 0 # Date range date_range = await db.execute( select( func.min(TLDPrice.recorded_at).label("first_record"), func.max(TLDPrice.recorded_at).label("last_record"), ) ) dates = date_range.one() # Unique scrape days (how many days we have data) # SQLite-compatible: count distinct date strings all_dates = await db.execute(select(TLDPrice.recorded_at)) date_rows = all_dates.all() unique_date_strs = set() for row in date_rows: if hasattr(row.recorded_at, 'strftime'): unique_date_strs.add(row.recorded_at.strftime("%Y-%m-%d")) elif row.recorded_at: unique_date_strs.add(str(row.recorded_at)[:10]) scrape_days = len(unique_date_strs) # TLDs with 5+ historical data points (enough for real charts) tlds_with_history = await db.execute( select(func.count()) .select_from( select(TLDPrice.tld) .group_by(TLDPrice.tld) .having(func.count(TLDPrice.id) >= 5) .subquery() ) ) chartable_tlds = tlds_with_history.scalar() or 0 # Registrars in database registrar_count = await db.execute( select(func.count(func.distinct(TLDPrice.registrar))) ) total_registrars = registrar_count.scalar() or 0 # Calculate coverage days_of_data = 0 if dates.first_record and dates.last_record: days_of_data = (dates.last_record - dates.first_record).days + 1 coverage_percent = round((scrape_days / days_of_data * 100), 1) if days_of_data > 0 else 0 return { "summary": { "total_tlds_tracked": total_tlds, "total_price_records": total_records, "tlds_with_real_history": chartable_tlds, "unique_registrars": total_registrars, }, "time_range": { "first_record": dates.first_record.isoformat() if dates.first_record else None, "last_record": dates.last_record.isoformat() if dates.last_record else None, "days_of_data": days_of_data, "days_with_scrapes": scrape_days, "coverage_percent": coverage_percent, }, "chart_readiness": { "tlds_ready_for_charts": chartable_tlds, "tlds_using_simulation": total_tlds - chartable_tlds, "recommendation": "Run daily scrapes for 7+ days to enable real charts" if chartable_tlds < 10 else "Good coverage!", }, "data_sources": { "static_tlds": len(TLD_DATA), "database_tlds": total_tlds, "combined_coverage": len(TLD_DATA) + max(0, total_tlds - len(TLD_DATA)), } } @router.get("/stats/scrape-history") async def get_scrape_history( db: Database, days: int = Query(30, ge=1, le=365), ): """ Get scraping history - shows when scrapes ran and how many records were collected. Useful for: - Identifying gaps in data collection - Verifying scheduler is working - Troubleshooting data issues """ cutoff = datetime.utcnow() - timedelta(days=days) # SQLite-compatible: fetch all and group in Python result = await db.execute( select(TLDPrice.recorded_at, TLDPrice.tld) .where(TLDPrice.recorded_at >= cutoff) ) rows = result.all() # Group by date in Python daily_data: dict[str, dict] = {} for row in rows: if hasattr(row.recorded_at, 'strftime'): date_str = row.recorded_at.strftime("%Y-%m-%d") elif row.recorded_at: date_str = str(row.recorded_at)[:10] else: continue if date_str not in daily_data: daily_data[date_str] = {"records": 0, "tlds": set()} daily_data[date_str]["records"] += 1 daily_data[date_str]["tlds"].add(row.tld) # Convert to list and sort by date descending scrape_history = [ { "date": date_str, "records_collected": data["records"], "tlds_scraped": len(data["tlds"]), } for date_str, data in sorted(daily_data.items(), reverse=True) ] total_records = sum(h["records_collected"] for h in scrape_history) return { "period_days": days, "total_scrape_days": len(scrape_history), "history": scrape_history, "avg_records_per_day": round(total_records / len(scrape_history), 0) if scrape_history else 0, }