pounce/backend/app/services/intent_detector.py
yves.gugger 58228e3d33
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
feat: integrate Pounce self-promotion & viral growth system
Pounce Eigenwerbung (from pounce_endgame.md):
- Add 'pounce_promo' as fallback partner for generic/unclear intent domains
- Create dedicated Pounce promo landing page with CTA to register
- Update footer on all yield pages: 'Monetized by Pounce • Own a domain? Start yielding'

Tech/Investment Domain Detection:
- Add 'investment_domains' category (invest, crypto, trading, domain, startup)
- Add 'tech_dev' category (developer, web3, fintech, proptech)
- Both categories have 'pounce_affinity' flag for higher Pounce conversion

Referral Tracking for Domain Owners:
- Add user fields: referred_by_user_id, referred_by_domain, referral_code
- Parse yield referral codes (yield_{user_id}_{domain_id}) on registration
- Domain owners earn lifetime commission when visitors sign up via their domain

DB Migrations:
- Add referral tracking columns to users table
2025-12-12 15:27:53 +01:00

525 lines
18 KiB
Python

"""
Intent Detection Engine for Yield Domains.
Analyzes domain names to detect user intent and match with affiliate partners.
Uses keyword matching, pattern detection, and NLP-lite techniques.
"""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class IntentResult:
"""Result of intent detection for a domain."""
category: str # Primary intent category
subcategory: Optional[str] # More specific subcategory
confidence: float # 0.0 - 1.0
keywords_matched: list[str] # Which keywords triggered the match
suggested_partners: list[str] # Affiliate partner slugs
monetization_potential: str # "high", "medium", "low"
# Intent categories with keywords (Swiss/German/English focus)
INTENT_PATTERNS = {
# Medical / Health
"medical_dental": {
"keywords": [
"zahnarzt", "dental", "dentist", "zahn", "zähne", "kieferorthopäde",
"implantate", "zahnklinik", "prothese", "bleaching", "zahnpflege",
"dentalhygiene", "mundgesundheit", "braces", "orthodontist"
],
"patterns": [r"zahn\w*", r"dent\w*"],
"potential": "high",
"partners": ["comparis_dental", "swisssmile", "dentaldeal"]
},
"medical_general": {
"keywords": [
"arzt", "doctor", "klinik", "clinic", "hospital", "spital",
"praxis", "gesundheit", "health", "medizin", "medicine",
"therapie", "therapy", "behandlung", "treatment"
],
"patterns": [r"med\w+", r"gesund\w*", r"health\w*"],
"potential": "high",
"partners": ["comparis_health", "sanitas", "helsana"]
},
"medical_beauty": {
"keywords": [
"schönheit", "beauty", "kosmetik", "cosmetic", "botox",
"filler", "laser", "aesthetic", "ästhetik", "haut", "skin",
"anti-aging", "wellness", "spa", "massage"
],
"patterns": [r"beauty\w*", r"kosm\w*"],
"potential": "high",
"partners": ["swissesthetic", "beautyfinder"]
},
# Finance / Insurance
"finance_insurance": {
"keywords": [
"versicherung", "insurance", "krankenkasse", "autoversicherung",
"hausrat", "haftpflicht", "lebensversicherung", "police"
],
"patterns": [r"versicher\w*", r"insur\w*"],
"potential": "high",
"partners": ["comparis_insurance", "bonus_ch", "financescout"]
},
"finance_mortgage": {
"keywords": [
"hypothek", "mortgage", "kredit", "credit", "darlehen", "loan",
"finanzierung", "financing", "immobilien", "eigenheim"
],
"patterns": [r"hypo\w*", r"kredit\w*", r"mortg\w*"],
"potential": "high",
"partners": ["comparis_hypo", "moneypark", "hypocenter"]
},
"finance_banking": {
"keywords": [
"bank", "banking", "konto", "account", "sparen", "savings",
"anlegen", "invest", "geld", "money", "zinsen", "interest"
],
"patterns": [r"bank\w*", r"finanz\w*"],
"potential": "medium",
"partners": ["neon_bank", "yuh_ch"]
},
# Legal
"legal_general": {
"keywords": [
"anwalt", "lawyer", "rechtsanwalt", "attorney", "rechtshilfe",
"legal", "recht", "law", "kanzlei", "advokat", "jurist"
],
"patterns": [r"anwalt\w*", r"recht\w*", r"law\w*"],
"potential": "high",
"partners": ["legal_ch", "anwalt24"]
},
# Real Estate
"realestate_buy": {
"keywords": [
"immobilien", "realestate", "wohnung", "apartment", "haus", "house",
"kaufen", "buy", "villa", "eigentum", "property", "liegenschaft"
],
"patterns": [r"immobil\w*", r"wohn\w*"],
"potential": "high",
"partners": ["homegate", "immoscout", "comparis_immo"]
},
"realestate_rent": {
"keywords": [
"mieten", "rent", "miete", "mietwohnung", "rental", "wg",
"studio", "loft", "untermiete"
],
"patterns": [r"miet\w*", r"rent\w*"],
"potential": "medium",
"partners": ["homegate", "flatfox"]
},
# Travel
"travel_flights": {
"keywords": [
"flug", "flight", "fliegen", "fly", "airline", "flughafen",
"airport", "billigflug", "cheapflight", "reise", "travel"
],
"patterns": [r"fl[uy]g\w*", r"travel\w*"],
"potential": "medium",
"partners": ["skyscanner", "kayak", "booking"]
},
"travel_hotels": {
"keywords": [
"hotel", "unterkunft", "accommodation", "hostel", "pension",
"resort", "übernachtung", "booking", "airbnb"
],
"patterns": [r"hotel\w*"],
"potential": "medium",
"partners": ["booking_com", "trivago", "hotels_com"]
},
# E-Commerce / Shopping
"shopping_general": {
"keywords": [
"shop", "store", "kaufen", "buy", "einkaufen", "shopping",
"deals", "rabatt", "discount", "sale", "angebot", "offer"
],
"patterns": [r"shop\w*", r"deal\w*"],
"potential": "medium",
"partners": ["amazon_ch", "galaxus", "digitec"]
},
"shopping_fashion": {
"keywords": [
"mode", "fashion", "kleider", "clothes", "schuhe", "shoes",
"outfit", "style", "bekleidung", "garderobe"
],
"patterns": [r"mode\w*", r"fash\w*"],
"potential": "medium",
"partners": ["zalando", "about_you"]
},
# Automotive
"auto_buy": {
"keywords": [
"auto", "car", "fahrzeug", "vehicle", "wagen", "neuwagen",
"gebrauchtwagen", "occasion", "carmarket", "autohaus"
],
"patterns": [r"auto\w*", r"car\w*"],
"potential": "high",
"partners": ["autoscout", "comparis_auto", "carforyou"]
},
"auto_service": {
"keywords": [
"garage", "werkstatt", "reparatur", "repair", "service",
"reifenwechsel", "inspektion", "tuning"
],
"patterns": [r"garag\w*"],
"potential": "medium",
"partners": ["autobutler"]
},
# Jobs / Career
"jobs": {
"keywords": [
"job", "jobs", "karriere", "career", "arbeit", "work",
"stelle", "stellenangebot", "vacancy", "hiring", "bewerbung"
],
"patterns": [r"job\w*", r"karrier\w*"],
"potential": "medium",
"partners": ["jobs_ch", "indeed", "linkedin"]
},
# Education
"education": {
"keywords": [
"schule", "school", "uni", "university", "bildung", "education",
"kurs", "course", "lernen", "learn", "ausbildung", "training",
"weiterbildung", "studium", "studieren"
],
"patterns": [r"schul\w*", r"edu\w*", r"learn\w*"],
"potential": "medium",
"partners": ["udemy", "coursera", "edx"]
},
# Technology
"tech_hosting": {
"keywords": [
"hosting", "server", "cloud", "domain", "website", "webhosting",
"vps", "dedicated", "webspace"
],
"patterns": [r"host\w*", r"server\w*"],
"potential": "medium",
"partners": ["hostpoint", "infomaniak", "cyon"]
},
"tech_software": {
"keywords": [
"software", "app", "tool", "saas", "crm", "erp",
"programm", "application", "platform"
],
"patterns": [r"soft\w*", r"app\w*"],
"potential": "medium",
"partners": ["capterra", "g2"]
},
# Investment / Crypto / Finance Tech - HIGH POUNCE CONVERSION
"investment_domains": {
"keywords": [
"invest", "investment", "investor", "portfolio", "asset", "assets",
"trading", "trader", "crypto", "bitcoin", "blockchain", "nft",
"domain", "domains", "digital", "passive", "income", "yield",
"startup", "founder", "entrepreneur", "venture", "capital"
],
"patterns": [r"invest\w*", r"trad\w*", r"crypto\w*", r"domain\w*"],
"potential": "high",
"partners": ["pounce_promo"], # Pounce self-promotion
"pounce_affinity": True, # Flag for Pounce self-promotion
},
# Tech / Developer - GOOD POUNCE CONVERSION
"tech_dev": {
"keywords": [
"dev", "developer", "code", "coding", "tech", "technology",
"api", "sdk", "github", "git", "open-source", "opensource",
"web3", "defi", "fintech", "proptech", "saas"
],
"patterns": [r"dev\w*", r"tech\w*", r"web\d*"],
"potential": "medium",
"partners": ["pounce_promo"],
"pounce_affinity": True,
},
# Food / Restaurant
"food_restaurant": {
"keywords": [
"restaurant", "essen", "food", "pizza", "sushi", "burger",
"cafe", "bistro", "gastronomie", "dining"
],
"patterns": [r"food\w*", r"pizza\w*"],
"potential": "low",
"partners": ["eatme", "uber_eats"]
},
"food_delivery": {
"keywords": [
"lieferung", "delivery", "liefern", "bestellen", "order",
"takeaway", "takeout"
],
"patterns": [r"deliver\w*", r"liefer\w*"],
"potential": "medium",
"partners": ["uber_eats", "just_eat"]
},
}
# Swiss city names for geo-targeting
SWISS_CITIES = {
"zürich", "zurich", "zuerich", "zri", "zh",
"bern", "genf", "geneva", "geneve",
"basel", "lausanne", "luzern", "lucerne",
"winterthur", "stgallen", "st-gallen", "lugano",
"biel", "bienne", "thun", "köniz", "chur",
"schaffhausen", "fribourg", "freiburg",
"neuchatel", "neuenburg", "uster", "sion", "sitten",
"zug", "aarau", "baden", "wil", "davos", "interlaken"
}
# German cities
GERMAN_CITIES = {
"berlin", "münchen", "munich", "muenchen", "hamburg",
"frankfurt", "köln", "koeln", "düsseldorf", "duesseldorf",
"stuttgart", "dortmund", "essen", "leipzig", "bremen"
}
class IntentDetector:
"""
Detects user intent from domain names.
Uses keyword matching and pattern detection to categorize domains
and suggest appropriate affiliate partners for monetization.
"""
def __init__(self):
self.patterns = INTENT_PATTERNS
self.swiss_cities = SWISS_CITIES
self.german_cities = GERMAN_CITIES
def detect(self, domain: str) -> IntentResult:
"""
Analyze a domain name and detect its intent category.
Args:
domain: The domain name (e.g., "zahnarzt-zuerich.ch")
Returns:
IntentResult with category, confidence, and partner suggestions
"""
# Normalize domain
domain_clean = self._normalize_domain(domain)
parts = self._split_domain_parts(domain_clean)
# Find best matching category
best_match = None
best_score = 0.0
best_keywords = []
for category, config in self.patterns.items():
score, matched_keywords = self._score_category(parts, config)
if score > best_score:
best_score = score
best_match = category
best_keywords = matched_keywords
# Determine confidence level
confidence = min(best_score / 3.0, 1.0) # Normalize to 0-1
# If no strong match, return generic
if best_score < 0.5 or best_match is None:
return IntentResult(
category="generic",
subcategory=None,
confidence=0.2,
keywords_matched=[],
suggested_partners=["generic_affiliate"],
monetization_potential="low"
)
# Get category config
config = self.patterns[best_match]
# Split category into main and sub
parts = best_match.split("_", 1)
main_category = parts[0]
subcategory = parts[1] if len(parts) > 1 else None
return IntentResult(
category=main_category,
subcategory=subcategory,
confidence=confidence,
keywords_matched=best_keywords,
suggested_partners=config.get("partners", []),
monetization_potential=config.get("potential", "medium")
)
def detect_geo(self, domain: str) -> Optional[str]:
"""
Detect geographic targeting from domain name.
Returns:
ISO country code if detected (e.g., "CH", "DE"), None otherwise
"""
domain_clean = self._normalize_domain(domain)
parts = set(self._split_domain_parts(domain_clean))
# Check TLD first
if domain.endswith(".ch") or domain.endswith(".swiss"):
return "CH"
if domain.endswith(".de"):
return "DE"
if domain.endswith(".at"):
return "AT"
# Check city names
if parts & self.swiss_cities:
return "CH"
if parts & self.german_cities:
return "DE"
return None
def estimate_value(self, domain: str) -> dict:
"""
Estimate the monetization value of a domain.
Returns dict with value estimates based on intent and traffic potential.
"""
intent = self.detect(domain)
geo = self.detect_geo(domain)
# Base value by potential
base_values = {
"high": {"min": 50, "max": 500},
"medium": {"min": 20, "max": 100},
"low": {"min": 5, "max": 30}
}
potential = intent.monetization_potential
base = base_values.get(potential, base_values["low"])
# Adjust for geo (Swiss = premium)
multiplier = 1.5 if geo == "CH" else 1.0
# Adjust for confidence
confidence_mult = 0.5 + (intent.confidence * 0.5)
return {
"estimated_monthly_min": int(base["min"] * multiplier * confidence_mult),
"estimated_monthly_max": int(base["max"] * multiplier * confidence_mult),
"currency": "CHF" if geo == "CH" else "EUR",
"potential": potential,
"confidence": intent.confidence,
"geo": geo
}
def _normalize_domain(self, domain: str) -> str:
"""Remove TLD and normalize domain string."""
# Remove common TLDs
domain = re.sub(r'\.(com|net|org|ch|de|at|io|co|info|swiss)$', '', domain.lower())
# Replace common separators with space
domain = re.sub(r'[-_.]', ' ', domain)
return domain.strip()
def _split_domain_parts(self, domain_clean: str) -> list[str]:
"""Split domain into meaningful parts."""
# Split on spaces (from separators)
parts = domain_clean.split()
# Also try to split camelCase or compound words
expanded = []
for part in parts:
# Try to find compound word boundaries
expanded.append(part)
# Add any sub-matches for longer words
if len(part) > 6:
expanded.extend(self._find_subwords(part))
return expanded
def _find_subwords(self, word: str) -> list[str]:
"""Find meaningful subwords in compound words."""
subwords = []
# Check if any keywords are contained in this word
for config in self.patterns.values():
for keyword in config["keywords"]:
if keyword in word and keyword != word:
subwords.append(keyword)
return subwords
def _score_category(self, parts: list[str], config: dict) -> tuple[float, list[str]]:
"""
Score how well domain parts match a category.
Returns (score, matched_keywords)
"""
score = 0.0
matched = []
keywords = set(config.get("keywords", []))
patterns = config.get("patterns", [])
for part in parts:
# Exact keyword match
if part in keywords:
score += 1.0
matched.append(part)
continue
# Partial keyword match
for kw in keywords:
if kw in part or part in kw:
score += 0.5
matched.append(f"{part}~{kw}")
break
# Regex pattern match
for pattern in patterns:
if re.match(pattern, part):
score += 0.7
matched.append(f"{part}@{pattern}")
break
return score, matched
# Singleton instance
_detector = None
def get_intent_detector() -> IntentDetector:
"""Get singleton IntentDetector instance."""
global _detector
if _detector is None:
_detector = IntentDetector()
return _detector
def detect_domain_intent(domain: str) -> IntentResult:
"""Convenience function to detect intent for a domain."""
return get_intent_detector().detect(domain)
def estimate_domain_yield(domain: str) -> dict:
"""Convenience function to estimate yield value for a domain."""
detector = get_intent_detector()
intent = detector.detect(domain)
value = detector.estimate_value(domain)
return {
"domain": domain,
"intent": {
"category": intent.category,
"subcategory": intent.subcategory,
"confidence": intent.confidence,
"keywords": intent.keywords_matched
},
"value": value,
"partners": intent.suggested_partners,
"monetization_potential": intent.monetization_potential
}