pounce/backend/app/services/valuation.py
yves.gugger e3234e660e fix: Backend deps & transparent valuation schema
FIXED:
- Added get_current_user_optional to deps.py for mixed auth endpoints
- Added OptionalUser type alias for cleaner annotations
- Expanded ValuationResponse schema with full transparency:
  - ValuationScores (length, tld, keyword, brandability, overall)
  - ValuationFactors (length, tld, has_numbers, has_hyphens, etc.)
  - ValuationCalculation (base_value, all factors with reasons, formula)
  - RegistrationContext (tld_cost, value_to_cost_ratio)
  - Added disclaimer field
- Increased base_value from $10 to $50 for realistic valuations

VALUATION EXAMPLES:
- crypto.ai: $375 (was $70)
- x.com: $850 (1-letter premium)
- generic-domain-name.info: $5 (long with hyphens)

All API endpoints tested and working.
2025-12-08 13:51:09 +01:00

532 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Domain valuation service with transparent calculations."""
import logging
import re
from datetime import datetime
from typing import Optional, Dict, Any, List
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.portfolio import DomainValuation
from app.models.tld_price import TLDPrice
logger = logging.getLogger(__name__)
# TLD base value multipliers (market-researched)
# These reflect the relative premium/discount of TLDs in the aftermarket
TLD_VALUES = {
# Premium Generic TLDs - High aftermarket demand
"com": 1.0, # Gold standard, baseline
"net": 0.65, # ~65% of .com value
"org": 0.60, # ~60% of .com value
# Tech/Startup TLDs - High demand in specific sectors
"io": 0.75, # Popular with startups, premium pricing
"ai": 1.20, # AI boom, extremely high demand
"co": 0.55, # Company alternative
"dev": 0.45, # Developer focused
"app": 0.45, # App ecosystem
"tech": 0.35, # Technology sector
# Geographic TLDs - Regional value
"de": 0.50, # Germany - largest European ccTLD
"uk": 0.45, # United Kingdom
"ch": 0.40, # Switzerland - premium market
"fr": 0.35, # France
"eu": 0.30, # European Union
"nl": 0.35, # Netherlands
# New gTLDs - Generally lower aftermarket value
"xyz": 0.15, # Budget option
"online": 0.18,
"site": 0.15,
"store": 0.22,
"shop": 0.22,
"club": 0.15,
"info": 0.20,
"biz": 0.25,
"me": 0.30, # Personal branding
# Default for unknown TLDs
"_default": 0.15,
}
# High-value keywords that increase domain value
HIGH_VALUE_KEYWORDS = {
# Crypto/Web3 - Very high value
"crypto": 2.0, "bitcoin": 2.0, "btc": 1.8, "eth": 1.8, "nft": 1.5,
"web3": 1.8, "defi": 1.5, "blockchain": 1.5,
# AI/Tech - High value
"ai": 2.0, "gpt": 1.8, "ml": 1.5, "chat": 1.3, "bot": 1.2,
"cloud": 1.3, "saas": 1.4, "api": 1.3, "data": 1.2,
# Finance - High value
"finance": 1.5, "fintech": 1.5, "bank": 1.6, "pay": 1.4,
"money": 1.3, "invest": 1.4, "trade": 1.3, "fund": 1.4,
# E-commerce - Medium-high value
"shop": 1.2, "buy": 1.2, "sell": 1.1, "deal": 1.1,
"store": 1.2, "market": 1.2,
# Health - Medium-high value
"health": 1.3, "med": 1.2, "care": 1.1, "fit": 1.1,
# Entertainment - Medium value
"game": 1.2, "gaming": 1.2, "play": 1.1, "esport": 1.2,
# Travel - Medium value
"travel": 1.2, "trip": 1.1, "hotel": 1.2, "fly": 1.1,
# Real Estate - Medium-high value
"home": 1.2, "house": 1.2, "real": 1.1, "estate": 1.3,
# Auto - Medium value
"auto": 1.2, "car": 1.2, "drive": 1.1, "ev": 1.3,
}
# Common English words that make domains more brandable
COMMON_BRANDABLE_WORDS = {
"app", "web", "net", "dev", "code", "tech", "data", "cloud",
"shop", "store", "buy", "sell", "pay", "cash", "money",
"game", "play", "fun", "cool", "best", "top", "pro", "max",
"home", "life", "love", "care", "help", "work", "job",
"news", "blog", "post", "chat", "talk", "meet", "link",
"fast", "quick", "smart", "easy", "simple", "free", "new",
"hub", "lab", "box", "bit", "one", "go", "my", "get",
}
class DomainValuationService:
"""
Professional domain valuation service with transparent methodology.
VALUATION FORMULA:
------------------
Base Value = $10
Estimated Value = Base × Length_Factor × TLD_Factor × Keyword_Factor × Brand_Factor
Where:
- Length_Factor: Shorter domains are exponentially more valuable
- 2-3 chars: ×10.0
- 4 chars: ×5.0
- 5 chars: ×3.0
- 6-7 chars: ×2.0
- 8-10 chars: ×1.0
- 11+ chars: ×0.5 (decreasing)
- TLD_Factor: Based on aftermarket research
- .com = 1.0 (baseline)
- .ai = 1.2 (premium)
- .io = 0.75
- Others: See TLD_VALUES
- Keyword_Factor: Premium keywords add value
- Contains "ai", "crypto", etc. = up to 2.0×
- No premium keywords = 1.0×
- Brand_Factor: Brandability adjustments
- Pronounceable: +20%
- All letters: +10%
- Contains numbers: -30%
- Contains hyphens: -40%
CONFIDENCE LEVELS:
- High: All scores > 50, consistent factors
- Medium: Most scores > 40
- Low: Mixed or poor scores
LIMITATIONS:
- Cannot assess traffic/backlinks (would need external API)
- Cannot verify trademark conflicts
- Based on algorithmic analysis, not actual sales data
"""
def __init__(self):
# Base value calibrated to market research
# A generic 10-char .com domain with no keywords typically sells for ~$50-100
# Our formula: $50 × factors should produce realistic values
self.base_value = 50 # Base value in USD
async def estimate_value(
self,
domain: str,
db: Optional[AsyncSession] = None,
save_result: bool = True,
) -> Dict[str, Any]:
"""
Estimate the market value of a domain with full transparency.
Returns a detailed breakdown of how the value was calculated.
"""
domain = domain.lower().strip()
# Parse domain
parts = domain.rsplit(".", 1)
if len(parts) != 2:
return {"error": "Invalid domain format. Use: name.tld"}
name, tld = parts
# Get real TLD registration cost if available
tld_registration_cost = await self._get_tld_cost(db, tld) if db else None
# Calculate individual factors
length_analysis = self._analyze_length(name)
tld_analysis = self._analyze_tld(tld, tld_registration_cost)
keyword_analysis = self._analyze_keywords(name)
brand_analysis = self._analyze_brandability(name)
# Calculate final value
raw_value = (
self.base_value
* length_analysis["factor"]
* tld_analysis["factor"]
* keyword_analysis["factor"]
* brand_analysis["factor"]
)
# Apply reasonable bounds
estimated_value = self._round_value(max(5, min(raw_value, 1000000)))
# Determine confidence
confidence = self._calculate_confidence(
length_analysis["score"],
tld_analysis["score"],
keyword_analysis["score"],
brand_analysis["score"],
)
result = {
"domain": domain,
"estimated_value": estimated_value,
"currency": "USD",
"confidence": confidence,
# Transparent score breakdown
"scores": {
"length": length_analysis["score"],
"tld": tld_analysis["score"],
"keyword": keyword_analysis["score"],
"brandability": brand_analysis["score"],
"overall": round((
length_analysis["score"] +
tld_analysis["score"] +
keyword_analysis["score"] +
brand_analysis["score"]
) / 4),
},
# Detailed factor explanations
"factors": {
"length": len(name),
"tld": tld,
"has_numbers": bool(re.search(r"\d", name)),
"has_hyphens": "-" in name,
"is_dictionary_word": name.lower() in COMMON_BRANDABLE_WORDS,
"detected_keywords": keyword_analysis.get("detected_keywords", []),
},
# Transparent calculation breakdown
"calculation": {
"base_value": self.base_value,
"length_factor": round(length_analysis["factor"], 2),
"length_reason": length_analysis["reason"],
"tld_factor": round(tld_analysis["factor"], 2),
"tld_reason": tld_analysis["reason"],
"keyword_factor": round(keyword_analysis["factor"], 2),
"keyword_reason": keyword_analysis["reason"],
"brand_factor": round(brand_analysis["factor"], 2),
"brand_reason": brand_analysis["reason"],
"formula": f"${self.base_value} × {length_analysis['factor']:.1f} × {tld_analysis['factor']:.2f} × {keyword_analysis['factor']:.1f} × {brand_analysis['factor']:.2f}",
"raw_result": round(raw_value, 2),
},
# Registration cost context
"registration_context": {
"tld_cost": tld_registration_cost,
"value_to_cost_ratio": round(estimated_value / tld_registration_cost, 1) if tld_registration_cost and tld_registration_cost > 0 else None,
},
"source": "pounce_algorithm_v1",
"calculated_at": datetime.utcnow().isoformat(),
# Disclaimer
"disclaimer": "This valuation is algorithmic and based on domain characteristics. "
"Actual market value depends on traffic, backlinks, brandability perception, "
"buyer interest, and current market conditions. For domains valued over $1,000, "
"consider professional appraisal services like Estibot or GoDaddy."
}
# Save to database if requested
if save_result and db:
try:
valuation = DomainValuation(
domain=domain,
estimated_value=estimated_value,
length_score=length_analysis["score"],
tld_score=tld_analysis["score"],
keyword_score=keyword_analysis["score"],
brandability_score=brand_analysis["score"],
source="pounce_algorithm_v1",
)
db.add(valuation)
await db.commit()
except Exception as e:
logger.error(f"Failed to save valuation: {e}")
return result
async def _get_tld_cost(self, db: AsyncSession, tld: str) -> Optional[float]:
"""Get average registration cost for a TLD from database."""
try:
result = await db.execute(
select(func.avg(TLDPrice.registration_price))
.where(TLDPrice.tld == tld.lower())
)
avg_price = result.scalar()
return round(avg_price, 2) if avg_price else None
except Exception:
return None
def _analyze_length(self, name: str) -> Dict[str, Any]:
"""Analyze domain length and return factor with explanation."""
length = len(name)
# Length-based multipliers (exponential for short domains)
if length <= 2:
factor = 15.0
score = 100
reason = f"Ultra-premium 2-letter domain (×{factor})"
elif length == 3:
factor = 10.0
score = 95
reason = f"Premium 3-letter domain (×{factor})"
elif length == 4:
factor = 5.0
score = 85
reason = f"Highly valuable 4-letter domain (×{factor})"
elif length == 5:
factor = 3.0
score = 75
reason = f"Valuable 5-letter domain (×{factor})"
elif length == 6:
factor = 2.0
score = 65
reason = f"Good 6-letter domain (×{factor})"
elif length == 7:
factor = 1.5
score = 55
reason = f"Standard 7-letter domain (×{factor})"
elif length <= 10:
factor = 1.0
score = 45
reason = f"Average length domain (×{factor})"
elif length <= 15:
factor = 0.6
score = 30
reason = f"Longer domain, reduced value (×{factor})"
elif length <= 20:
factor = 0.3
score = 15
reason = f"Very long domain (×{factor})"
else:
factor = 0.1
score = 5
reason = f"Extremely long domain (×{factor})"
return {"factor": factor, "score": score, "reason": reason}
def _analyze_tld(self, tld: str, registration_cost: Optional[float]) -> Dict[str, Any]:
"""Analyze TLD value with market context."""
base_factor = TLD_VALUES.get(tld, TLD_VALUES["_default"])
# Adjust explanation based on TLD type
if tld == "com":
reason = ".com is the gold standard (×1.0 baseline)"
score = 100
elif tld == "ai":
reason = ".ai has premium value due to AI industry demand (×1.2)"
score = 100
elif tld in ["io", "co"]:
reason = f".{tld} is popular with startups (×{base_factor})"
score = int(base_factor * 100)
elif tld in ["net", "org"]:
reason = f".{tld} is a classic gTLD with good recognition (×{base_factor})"
score = int(base_factor * 100)
elif tld in ["de", "uk", "ch", "fr", "eu", "nl"]:
reason = f".{tld} is a regional ccTLD with local value (×{base_factor})"
score = int(base_factor * 100)
elif tld in ["xyz", "online", "site", "club"]:
reason = f".{tld} is a newer gTLD with lower aftermarket demand (×{base_factor})"
score = int(base_factor * 100)
else:
reason = f".{tld} is not a common TLD, limited aftermarket (×{base_factor})"
score = int(base_factor * 100)
# Add registration cost context
if registration_cost:
reason += f" | Reg. cost: ${registration_cost}"
return {"factor": base_factor, "score": score, "reason": reason}
def _analyze_keywords(self, name: str) -> Dict[str, Any]:
"""Analyze keyword value in domain name."""
name_lower = name.lower()
factor = 1.0
detected = []
reasons = []
# Check for high-value keywords
for keyword, multiplier in HIGH_VALUE_KEYWORDS.items():
if keyword in name_lower:
if multiplier > factor:
factor = multiplier
detected.append(f"{keyword} (×{multiplier})")
# Exact match bonus
if name_lower in HIGH_VALUE_KEYWORDS:
factor *= 1.5
detected.append("Exact keyword match (+50%)")
# Common word bonus
if name_lower in COMMON_BRANDABLE_WORDS:
factor *= 1.3
detected.append("Common brandable word (+30%)")
# Build reason
if detected:
reason = f"Premium keywords detected: {', '.join(detected[:3])}"
score = min(100, int(factor * 40))
else:
reason = "No premium keywords detected (×1.0)"
score = 30
return {
"factor": factor,
"score": score,
"reason": reason,
"detected_keywords": detected
}
def _analyze_brandability(self, name: str) -> Dict[str, Any]:
"""Analyze brandability and memorability."""
factor = 1.0
adjustments = []
# Positive factors
if self._is_pronounceable(name):
factor *= 1.2
adjustments.append("Pronounceable (+20%)")
if name.isalpha():
factor *= 1.1
adjustments.append("All letters (+10%)")
if 4 <= len(name) <= 8:
factor *= 1.1
adjustments.append("Ideal length for branding (+10%)")
# Negative factors
if re.search(r"\d", name):
factor *= 0.7
adjustments.append("Contains numbers (-30%)")
if "-" in name:
factor *= 0.6
adjustments.append("Contains hyphens (-40%)")
if re.search(r"(.)\1{2,}", name):
factor *= 0.9
adjustments.append("Triple letters (-10%)")
if re.search(r"[0oO][1lI]|[1lI][0oO]", name):
factor *= 0.85
adjustments.append("Confusing characters (-15%)")
if re.search(r"[bcdfghjklmnpqrstvwxyz]{5,}", name.lower()):
factor *= 0.85
adjustments.append("Hard consonant cluster (-15%)")
# Build reason
if adjustments:
reason = " | ".join(adjustments[:4])
else:
reason = "Standard brandability (×1.0)"
score = min(100, max(0, int(factor * 60)))
return {"factor": factor, "score": score, "reason": reason}
def _is_pronounceable(self, name: str) -> bool:
"""Check if name is likely pronounceable."""
vowels = set("aeiou")
name_lower = name.lower()
if not any(c in vowels for c in name_lower):
return False
vowel_count = sum(1 for c in name_lower if c in vowels)
vowel_ratio = vowel_count / len(name) if name else 0
return 0.2 <= vowel_ratio <= 0.6
def _round_value(self, value: float) -> int:
"""Round value to reasonable precision based on magnitude."""
if value < 50:
return round(value / 5) * 5 # Round to nearest 5
elif value < 100:
return round(value / 10) * 10 # Round to nearest 10
elif value < 500:
return round(value / 25) * 25 # Round to nearest 25
elif value < 1000:
return round(value / 50) * 50 # Round to nearest 50
elif value < 10000:
return round(value / 100) * 100 # Round to nearest 100
elif value < 100000:
return round(value / 500) * 500 # Round to nearest 500
else:
return round(value / 1000) * 1000 # Round to nearest 1000
def _calculate_confidence(self, *scores: int) -> str:
"""Calculate confidence level based on score distribution."""
avg = sum(scores) / len(scores)
min_score = min(scores)
variance = sum((s - avg) ** 2 for s in scores) / len(scores)
if min_score >= 50 and avg >= 60 and variance < 150:
return "high"
elif min_score >= 30 and avg >= 45 and variance < 300:
return "medium"
else:
return "low"
async def get_historical_valuations(
self,
domain: str,
db: AsyncSession,
limit: int = 10,
) -> List[Dict]:
"""Get historical valuations for tracking value changes."""
result = await db.execute(
select(DomainValuation)
.where(DomainValuation.domain == domain.lower())
.order_by(DomainValuation.created_at.desc())
.limit(limit)
)
valuations = result.scalars().all()
return [
{
"estimated_value": v.estimated_value,
"calculated_at": v.created_at.isoformat(),
"source": v.source,
}
for v in valuations
]
# Singleton instance
valuation_service = DomainValuationService()