"""Domain valuation service with transparent calculations.""" import logging import re from datetime import datetime from typing import Optional, Dict, Any, List from sqlalchemy import select, func from sqlalchemy.ext.asyncio import AsyncSession from app.models.portfolio import DomainValuation from app.models.tld_price import TLDPrice logger = logging.getLogger(__name__) # TLD base value multipliers (market-researched) # These reflect the relative premium/discount of TLDs in the aftermarket TLD_VALUES = { # Premium Generic TLDs - High aftermarket demand "com": 1.0, # Gold standard, baseline "net": 0.65, # ~65% of .com value "org": 0.60, # ~60% of .com value # Tech/Startup TLDs - High demand in specific sectors "io": 0.75, # Popular with startups, premium pricing "ai": 1.20, # AI boom, extremely high demand "co": 0.55, # Company alternative "dev": 0.45, # Developer focused "app": 0.45, # App ecosystem "tech": 0.35, # Technology sector # Geographic TLDs - Regional value "de": 0.50, # Germany - largest European ccTLD "uk": 0.45, # United Kingdom "ch": 0.40, # Switzerland - premium market "fr": 0.35, # France "eu": 0.30, # European Union "nl": 0.35, # Netherlands # New gTLDs - Generally lower aftermarket value "xyz": 0.15, # Budget option "online": 0.18, "site": 0.15, "store": 0.22, "shop": 0.22, "club": 0.15, "info": 0.20, "biz": 0.25, "me": 0.30, # Personal branding # Default for unknown TLDs "_default": 0.15, } # High-value keywords that increase domain value HIGH_VALUE_KEYWORDS = { # Crypto/Web3 - Very high value "crypto": 2.0, "bitcoin": 2.0, "btc": 1.8, "eth": 1.8, "nft": 1.5, "web3": 1.8, "defi": 1.5, "blockchain": 1.5, # AI/Tech - High value "ai": 2.0, "gpt": 1.8, "ml": 1.5, "chat": 1.3, "bot": 1.2, "cloud": 1.3, "saas": 1.4, "api": 1.3, "data": 1.2, # Finance - High value "finance": 1.5, "fintech": 1.5, "bank": 1.6, "pay": 1.4, "money": 1.3, "invest": 1.4, "trade": 1.3, "fund": 1.4, # E-commerce - Medium-high value "shop": 1.2, "buy": 1.2, "sell": 1.1, "deal": 1.1, "store": 1.2, "market": 1.2, # Health - Medium-high value "health": 1.3, "med": 1.2, "care": 1.1, "fit": 1.1, # Entertainment - Medium value "game": 1.2, "gaming": 1.2, "play": 1.1, "esport": 1.2, # Travel - Medium value "travel": 1.2, "trip": 1.1, "hotel": 1.2, "fly": 1.1, # Real Estate - Medium-high value "home": 1.2, "house": 1.2, "real": 1.1, "estate": 1.3, # Auto - Medium value "auto": 1.2, "car": 1.2, "drive": 1.1, "ev": 1.3, } # Common English words that make domains more brandable COMMON_BRANDABLE_WORDS = { "app", "web", "net", "dev", "code", "tech", "data", "cloud", "shop", "store", "buy", "sell", "pay", "cash", "money", "game", "play", "fun", "cool", "best", "top", "pro", "max", "home", "life", "love", "care", "help", "work", "job", "news", "blog", "post", "chat", "talk", "meet", "link", "fast", "quick", "smart", "easy", "simple", "free", "new", "hub", "lab", "box", "bit", "one", "go", "my", "get", } class DomainValuationService: """ Professional domain valuation service with transparent methodology. VALUATION FORMULA: ------------------ Base Value = $10 Estimated Value = Base × Length_Factor × TLD_Factor × Keyword_Factor × Brand_Factor Where: - Length_Factor: Shorter domains are exponentially more valuable - 2-3 chars: ×10.0 - 4 chars: ×5.0 - 5 chars: ×3.0 - 6-7 chars: ×2.0 - 8-10 chars: ×1.0 - 11+ chars: ×0.5 (decreasing) - TLD_Factor: Based on aftermarket research - .com = 1.0 (baseline) - .ai = 1.2 (premium) - .io = 0.75 - Others: See TLD_VALUES - Keyword_Factor: Premium keywords add value - Contains "ai", "crypto", etc. = up to 2.0× - No premium keywords = 1.0× - Brand_Factor: Brandability adjustments - Pronounceable: +20% - All letters: +10% - Contains numbers: -30% - Contains hyphens: -40% CONFIDENCE LEVELS: - High: All scores > 50, consistent factors - Medium: Most scores > 40 - Low: Mixed or poor scores LIMITATIONS: - Cannot assess traffic/backlinks (would need external API) - Cannot verify trademark conflicts - Based on algorithmic analysis, not actual sales data """ def __init__(self): # Base value calibrated to market research # A generic 10-char .com domain with no keywords typically sells for ~$50-100 # Our formula: $50 × factors should produce realistic values self.base_value = 50 # Base value in USD async def estimate_value( self, domain: str, db: Optional[AsyncSession] = None, save_result: bool = True, ) -> Dict[str, Any]: """ Estimate the market value of a domain with full transparency. Returns a detailed breakdown of how the value was calculated. """ domain = domain.lower().strip() # Parse domain parts = domain.rsplit(".", 1) if len(parts) != 2: return {"error": "Invalid domain format. Use: name.tld"} name, tld = parts # Get real TLD registration cost if available tld_registration_cost = await self._get_tld_cost(db, tld) if db else None # Calculate individual factors length_analysis = self._analyze_length(name) tld_analysis = self._analyze_tld(tld, tld_registration_cost) keyword_analysis = self._analyze_keywords(name) brand_analysis = self._analyze_brandability(name) # Calculate final value raw_value = ( self.base_value * length_analysis["factor"] * tld_analysis["factor"] * keyword_analysis["factor"] * brand_analysis["factor"] ) # Apply reasonable bounds estimated_value = self._round_value(max(5, min(raw_value, 1000000))) # Determine confidence confidence = self._calculate_confidence( length_analysis["score"], tld_analysis["score"], keyword_analysis["score"], brand_analysis["score"], ) result = { "domain": domain, "estimated_value": estimated_value, "currency": "USD", "confidence": confidence, # Transparent score breakdown "scores": { "length": length_analysis["score"], "tld": tld_analysis["score"], "keyword": keyword_analysis["score"], "brandability": brand_analysis["score"], "overall": round(( length_analysis["score"] + tld_analysis["score"] + keyword_analysis["score"] + brand_analysis["score"] ) / 4), }, # Detailed factor explanations "factors": { "length": len(name), "tld": tld, "has_numbers": bool(re.search(r"\d", name)), "has_hyphens": "-" in name, "is_dictionary_word": name.lower() in COMMON_BRANDABLE_WORDS, "detected_keywords": keyword_analysis.get("detected_keywords", []), }, # Transparent calculation breakdown "calculation": { "base_value": self.base_value, "length_factor": round(length_analysis["factor"], 2), "length_reason": length_analysis["reason"], "tld_factor": round(tld_analysis["factor"], 2), "tld_reason": tld_analysis["reason"], "keyword_factor": round(keyword_analysis["factor"], 2), "keyword_reason": keyword_analysis["reason"], "brand_factor": round(brand_analysis["factor"], 2), "brand_reason": brand_analysis["reason"], "formula": f"${self.base_value} × {length_analysis['factor']:.1f} × {tld_analysis['factor']:.2f} × {keyword_analysis['factor']:.1f} × {brand_analysis['factor']:.2f}", "raw_result": round(raw_value, 2), }, # Registration cost context "registration_context": { "tld_cost": tld_registration_cost, "value_to_cost_ratio": round(estimated_value / tld_registration_cost, 1) if tld_registration_cost and tld_registration_cost > 0 else None, }, "source": "pounce_algorithm_v1", "calculated_at": datetime.utcnow().isoformat(), # Disclaimer "disclaimer": "This valuation is algorithmic and based on domain characteristics. " "Actual market value depends on traffic, backlinks, brandability perception, " "buyer interest, and current market conditions. For domains valued over $1,000, " "consider professional appraisal services like Estibot or GoDaddy." } # Save to database if requested if save_result and db: try: valuation = DomainValuation( domain=domain, estimated_value=estimated_value, length_score=length_analysis["score"], tld_score=tld_analysis["score"], keyword_score=keyword_analysis["score"], brandability_score=brand_analysis["score"], source="pounce_algorithm_v1", ) db.add(valuation) await db.commit() except Exception as e: logger.error(f"Failed to save valuation: {e}") return result async def _get_tld_cost(self, db: AsyncSession, tld: str) -> Optional[float]: """Get average registration cost for a TLD from database.""" try: result = await db.execute( select(func.avg(TLDPrice.registration_price)) .where(TLDPrice.tld == tld.lower()) ) avg_price = result.scalar() return round(avg_price, 2) if avg_price else None except Exception: return None def _analyze_length(self, name: str) -> Dict[str, Any]: """Analyze domain length and return factor with explanation.""" length = len(name) # Length-based multipliers (exponential for short domains) if length <= 2: factor = 15.0 score = 100 reason = f"Ultra-premium 2-letter domain (×{factor})" elif length == 3: factor = 10.0 score = 95 reason = f"Premium 3-letter domain (×{factor})" elif length == 4: factor = 5.0 score = 85 reason = f"Highly valuable 4-letter domain (×{factor})" elif length == 5: factor = 3.0 score = 75 reason = f"Valuable 5-letter domain (×{factor})" elif length == 6: factor = 2.0 score = 65 reason = f"Good 6-letter domain (×{factor})" elif length == 7: factor = 1.5 score = 55 reason = f"Standard 7-letter domain (×{factor})" elif length <= 10: factor = 1.0 score = 45 reason = f"Average length domain (×{factor})" elif length <= 15: factor = 0.6 score = 30 reason = f"Longer domain, reduced value (×{factor})" elif length <= 20: factor = 0.3 score = 15 reason = f"Very long domain (×{factor})" else: factor = 0.1 score = 5 reason = f"Extremely long domain (×{factor})" return {"factor": factor, "score": score, "reason": reason} def _analyze_tld(self, tld: str, registration_cost: Optional[float]) -> Dict[str, Any]: """Analyze TLD value with market context.""" base_factor = TLD_VALUES.get(tld, TLD_VALUES["_default"]) # Adjust explanation based on TLD type if tld == "com": reason = ".com is the gold standard (×1.0 baseline)" score = 100 elif tld == "ai": reason = ".ai has premium value due to AI industry demand (×1.2)" score = 100 elif tld in ["io", "co"]: reason = f".{tld} is popular with startups (×{base_factor})" score = int(base_factor * 100) elif tld in ["net", "org"]: reason = f".{tld} is a classic gTLD with good recognition (×{base_factor})" score = int(base_factor * 100) elif tld in ["de", "uk", "ch", "fr", "eu", "nl"]: reason = f".{tld} is a regional ccTLD with local value (×{base_factor})" score = int(base_factor * 100) elif tld in ["xyz", "online", "site", "club"]: reason = f".{tld} is a newer gTLD with lower aftermarket demand (×{base_factor})" score = int(base_factor * 100) else: reason = f".{tld} is not a common TLD, limited aftermarket (×{base_factor})" score = int(base_factor * 100) # Add registration cost context if registration_cost: reason += f" | Reg. cost: ${registration_cost}" return {"factor": base_factor, "score": score, "reason": reason} def _analyze_keywords(self, name: str) -> Dict[str, Any]: """Analyze keyword value in domain name.""" name_lower = name.lower() factor = 1.0 detected = [] reasons = [] # Check for high-value keywords for keyword, multiplier in HIGH_VALUE_KEYWORDS.items(): if keyword in name_lower: if multiplier > factor: factor = multiplier detected.append(f"{keyword} (×{multiplier})") # Exact match bonus if name_lower in HIGH_VALUE_KEYWORDS: factor *= 1.5 detected.append("Exact keyword match (+50%)") # Common word bonus if name_lower in COMMON_BRANDABLE_WORDS: factor *= 1.3 detected.append("Common brandable word (+30%)") # Build reason if detected: reason = f"Premium keywords detected: {', '.join(detected[:3])}" score = min(100, int(factor * 40)) else: reason = "No premium keywords detected (×1.0)" score = 30 return { "factor": factor, "score": score, "reason": reason, "detected_keywords": detected } def _analyze_brandability(self, name: str) -> Dict[str, Any]: """Analyze brandability and memorability.""" factor = 1.0 adjustments = [] # Positive factors if self._is_pronounceable(name): factor *= 1.2 adjustments.append("Pronounceable (+20%)") if name.isalpha(): factor *= 1.1 adjustments.append("All letters (+10%)") if 4 <= len(name) <= 8: factor *= 1.1 adjustments.append("Ideal length for branding (+10%)") # Negative factors if re.search(r"\d", name): factor *= 0.7 adjustments.append("Contains numbers (-30%)") if "-" in name: factor *= 0.6 adjustments.append("Contains hyphens (-40%)") if re.search(r"(.)\1{2,}", name): factor *= 0.9 adjustments.append("Triple letters (-10%)") if re.search(r"[0oO][1lI]|[1lI][0oO]", name): factor *= 0.85 adjustments.append("Confusing characters (-15%)") if re.search(r"[bcdfghjklmnpqrstvwxyz]{5,}", name.lower()): factor *= 0.85 adjustments.append("Hard consonant cluster (-15%)") # Build reason if adjustments: reason = " | ".join(adjustments[:4]) else: reason = "Standard brandability (×1.0)" score = min(100, max(0, int(factor * 60))) return {"factor": factor, "score": score, "reason": reason} def _is_pronounceable(self, name: str) -> bool: """Check if name is likely pronounceable.""" vowels = set("aeiou") name_lower = name.lower() if not any(c in vowels for c in name_lower): return False vowel_count = sum(1 for c in name_lower if c in vowels) vowel_ratio = vowel_count / len(name) if name else 0 return 0.2 <= vowel_ratio <= 0.6 def _round_value(self, value: float) -> int: """Round value to reasonable precision based on magnitude.""" if value < 50: return round(value / 5) * 5 # Round to nearest 5 elif value < 100: return round(value / 10) * 10 # Round to nearest 10 elif value < 500: return round(value / 25) * 25 # Round to nearest 25 elif value < 1000: return round(value / 50) * 50 # Round to nearest 50 elif value < 10000: return round(value / 100) * 100 # Round to nearest 100 elif value < 100000: return round(value / 500) * 500 # Round to nearest 500 else: return round(value / 1000) * 1000 # Round to nearest 1000 def _calculate_confidence(self, *scores: int) -> str: """Calculate confidence level based on score distribution.""" avg = sum(scores) / len(scores) min_score = min(scores) variance = sum((s - avg) ** 2 for s in scores) / len(scores) if min_score >= 50 and avg >= 60 and variance < 150: return "high" elif min_score >= 30 and avg >= 45 and variance < 300: return "medium" else: return "low" async def get_historical_valuations( self, domain: str, db: AsyncSession, limit: int = 10, ) -> List[Dict]: """Get historical valuations for tracking value changes.""" result = await db.execute( select(DomainValuation) .where(DomainValuation.domain == domain.lower()) .order_by(DomainValuation.created_at.desc()) .limit(limit) ) valuations = result.scalars().all() return [ { "estimated_value": v.estimated_value, "calculated_at": v.created_at.isoformat(), "source": v.source, } for v in valuations ] # Singleton instance valuation_service = DomainValuationService()