pounce/backend/app/services/intent_detector.py
yves.gugger 76a118ddbf
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
feat: implement Yield/Intent Routing feature (pounce_endgame)
Backend:
- Add YieldDomain, YieldTransaction, YieldPayout, AffiliatePartner models
- Create IntentDetector service for keyword-based intent classification
- Implement /api/v1/yield/* endpoints (dashboard, domains, transactions, partners)
- Support domain activation, DNS verification, and revenue tracking

Frontend:
- Add /terminal/yield page with dashboard and activate wizard
- Add YIELD to sidebar navigation under 'Monetize' section
- Add 4th pillar 'Yield' to landing page 'Beyond Hunting' section
- Extend API client with yield endpoints and types

Features:
- AI-powered intent detection (medical, finance, legal, realestate, etc.)
- Swiss/German geo-targeting with city recognition
- Revenue estimation based on intent category and geo
- DNS verification via nameservers or CNAME
- 70/30 revenue split tracking
2025-12-12 14:39:56 +01:00

498 lines
16 KiB
Python

"""
Intent Detection Engine for Yield Domains.
Analyzes domain names to detect user intent and match with affiliate partners.
Uses keyword matching, pattern detection, and NLP-lite techniques.
"""
import re
from dataclasses import dataclass
from typing import Optional
@dataclass
class IntentResult:
"""Result of intent detection for a domain."""
category: str # Primary intent category
subcategory: Optional[str] # More specific subcategory
confidence: float # 0.0 - 1.0
keywords_matched: list[str] # Which keywords triggered the match
suggested_partners: list[str] # Affiliate partner slugs
monetization_potential: str # "high", "medium", "low"
# Intent categories with keywords (Swiss/German/English focus)
INTENT_PATTERNS = {
# Medical / Health
"medical_dental": {
"keywords": [
"zahnarzt", "dental", "dentist", "zahn", "zähne", "kieferorthopäde",
"implantate", "zahnklinik", "prothese", "bleaching", "zahnpflege",
"dentalhygiene", "mundgesundheit", "braces", "orthodontist"
],
"patterns": [r"zahn\w*", r"dent\w*"],
"potential": "high",
"partners": ["comparis_dental", "swisssmile", "dentaldeal"]
},
"medical_general": {
"keywords": [
"arzt", "doctor", "klinik", "clinic", "hospital", "spital",
"praxis", "gesundheit", "health", "medizin", "medicine",
"therapie", "therapy", "behandlung", "treatment"
],
"patterns": [r"med\w+", r"gesund\w*", r"health\w*"],
"potential": "high",
"partners": ["comparis_health", "sanitas", "helsana"]
},
"medical_beauty": {
"keywords": [
"schönheit", "beauty", "kosmetik", "cosmetic", "botox",
"filler", "laser", "aesthetic", "ästhetik", "haut", "skin",
"anti-aging", "wellness", "spa", "massage"
],
"patterns": [r"beauty\w*", r"kosm\w*"],
"potential": "high",
"partners": ["swissesthetic", "beautyfinder"]
},
# Finance / Insurance
"finance_insurance": {
"keywords": [
"versicherung", "insurance", "krankenkasse", "autoversicherung",
"hausrat", "haftpflicht", "lebensversicherung", "police"
],
"patterns": [r"versicher\w*", r"insur\w*"],
"potential": "high",
"partners": ["comparis_insurance", "bonus_ch", "financescout"]
},
"finance_mortgage": {
"keywords": [
"hypothek", "mortgage", "kredit", "credit", "darlehen", "loan",
"finanzierung", "financing", "immobilien", "eigenheim"
],
"patterns": [r"hypo\w*", r"kredit\w*", r"mortg\w*"],
"potential": "high",
"partners": ["comparis_hypo", "moneypark", "hypocenter"]
},
"finance_banking": {
"keywords": [
"bank", "banking", "konto", "account", "sparen", "savings",
"anlegen", "invest", "geld", "money", "zinsen", "interest"
],
"patterns": [r"bank\w*", r"finanz\w*"],
"potential": "medium",
"partners": ["neon_bank", "yuh_ch"]
},
# Legal
"legal_general": {
"keywords": [
"anwalt", "lawyer", "rechtsanwalt", "attorney", "rechtshilfe",
"legal", "recht", "law", "kanzlei", "advokat", "jurist"
],
"patterns": [r"anwalt\w*", r"recht\w*", r"law\w*"],
"potential": "high",
"partners": ["legal_ch", "anwalt24"]
},
# Real Estate
"realestate_buy": {
"keywords": [
"immobilien", "realestate", "wohnung", "apartment", "haus", "house",
"kaufen", "buy", "villa", "eigentum", "property", "liegenschaft"
],
"patterns": [r"immobil\w*", r"wohn\w*"],
"potential": "high",
"partners": ["homegate", "immoscout", "comparis_immo"]
},
"realestate_rent": {
"keywords": [
"mieten", "rent", "miete", "mietwohnung", "rental", "wg",
"studio", "loft", "untermiete"
],
"patterns": [r"miet\w*", r"rent\w*"],
"potential": "medium",
"partners": ["homegate", "flatfox"]
},
# Travel
"travel_flights": {
"keywords": [
"flug", "flight", "fliegen", "fly", "airline", "flughafen",
"airport", "billigflug", "cheapflight", "reise", "travel"
],
"patterns": [r"fl[uy]g\w*", r"travel\w*"],
"potential": "medium",
"partners": ["skyscanner", "kayak", "booking"]
},
"travel_hotels": {
"keywords": [
"hotel", "unterkunft", "accommodation", "hostel", "pension",
"resort", "übernachtung", "booking", "airbnb"
],
"patterns": [r"hotel\w*"],
"potential": "medium",
"partners": ["booking_com", "trivago", "hotels_com"]
},
# E-Commerce / Shopping
"shopping_general": {
"keywords": [
"shop", "store", "kaufen", "buy", "einkaufen", "shopping",
"deals", "rabatt", "discount", "sale", "angebot", "offer"
],
"patterns": [r"shop\w*", r"deal\w*"],
"potential": "medium",
"partners": ["amazon_ch", "galaxus", "digitec"]
},
"shopping_fashion": {
"keywords": [
"mode", "fashion", "kleider", "clothes", "schuhe", "shoes",
"outfit", "style", "bekleidung", "garderobe"
],
"patterns": [r"mode\w*", r"fash\w*"],
"potential": "medium",
"partners": ["zalando", "about_you"]
},
# Automotive
"auto_buy": {
"keywords": [
"auto", "car", "fahrzeug", "vehicle", "wagen", "neuwagen",
"gebrauchtwagen", "occasion", "carmarket", "autohaus"
],
"patterns": [r"auto\w*", r"car\w*"],
"potential": "high",
"partners": ["autoscout", "comparis_auto", "carforyou"]
},
"auto_service": {
"keywords": [
"garage", "werkstatt", "reparatur", "repair", "service",
"reifenwechsel", "inspektion", "tuning"
],
"patterns": [r"garag\w*"],
"potential": "medium",
"partners": ["autobutler"]
},
# Jobs / Career
"jobs": {
"keywords": [
"job", "jobs", "karriere", "career", "arbeit", "work",
"stelle", "stellenangebot", "vacancy", "hiring", "bewerbung"
],
"patterns": [r"job\w*", r"karrier\w*"],
"potential": "medium",
"partners": ["jobs_ch", "indeed", "linkedin"]
},
# Education
"education": {
"keywords": [
"schule", "school", "uni", "university", "bildung", "education",
"kurs", "course", "lernen", "learn", "ausbildung", "training",
"weiterbildung", "studium", "studieren"
],
"patterns": [r"schul\w*", r"edu\w*", r"learn\w*"],
"potential": "medium",
"partners": ["udemy", "coursera", "edx"]
},
# Technology
"tech_hosting": {
"keywords": [
"hosting", "server", "cloud", "domain", "website", "webhosting",
"vps", "dedicated", "webspace"
],
"patterns": [r"host\w*", r"server\w*"],
"potential": "medium",
"partners": ["hostpoint", "infomaniak", "cyon"]
},
"tech_software": {
"keywords": [
"software", "app", "tool", "saas", "crm", "erp",
"programm", "application", "platform"
],
"patterns": [r"soft\w*", r"app\w*"],
"potential": "medium",
"partners": ["capterra", "g2"]
},
# Food / Restaurant
"food_restaurant": {
"keywords": [
"restaurant", "essen", "food", "pizza", "sushi", "burger",
"cafe", "bistro", "gastronomie", "dining"
],
"patterns": [r"food\w*", r"pizza\w*"],
"potential": "low",
"partners": ["eatme", "uber_eats"]
},
"food_delivery": {
"keywords": [
"lieferung", "delivery", "liefern", "bestellen", "order",
"takeaway", "takeout"
],
"patterns": [r"deliver\w*", r"liefer\w*"],
"potential": "medium",
"partners": ["uber_eats", "just_eat"]
},
}
# Swiss city names for geo-targeting
SWISS_CITIES = {
"zürich", "zurich", "zuerich", "zri", "zh",
"bern", "genf", "geneva", "geneve",
"basel", "lausanne", "luzern", "lucerne",
"winterthur", "stgallen", "st-gallen", "lugano",
"biel", "bienne", "thun", "köniz", "chur",
"schaffhausen", "fribourg", "freiburg",
"neuchatel", "neuenburg", "uster", "sion", "sitten",
"zug", "aarau", "baden", "wil", "davos", "interlaken"
}
# German cities
GERMAN_CITIES = {
"berlin", "münchen", "munich", "muenchen", "hamburg",
"frankfurt", "köln", "koeln", "düsseldorf", "duesseldorf",
"stuttgart", "dortmund", "essen", "leipzig", "bremen"
}
class IntentDetector:
"""
Detects user intent from domain names.
Uses keyword matching and pattern detection to categorize domains
and suggest appropriate affiliate partners for monetization.
"""
def __init__(self):
self.patterns = INTENT_PATTERNS
self.swiss_cities = SWISS_CITIES
self.german_cities = GERMAN_CITIES
def detect(self, domain: str) -> IntentResult:
"""
Analyze a domain name and detect its intent category.
Args:
domain: The domain name (e.g., "zahnarzt-zuerich.ch")
Returns:
IntentResult with category, confidence, and partner suggestions
"""
# Normalize domain
domain_clean = self._normalize_domain(domain)
parts = self._split_domain_parts(domain_clean)
# Find best matching category
best_match = None
best_score = 0.0
best_keywords = []
for category, config in self.patterns.items():
score, matched_keywords = self._score_category(parts, config)
if score > best_score:
best_score = score
best_match = category
best_keywords = matched_keywords
# Determine confidence level
confidence = min(best_score / 3.0, 1.0) # Normalize to 0-1
# If no strong match, return generic
if best_score < 0.5 or best_match is None:
return IntentResult(
category="generic",
subcategory=None,
confidence=0.2,
keywords_matched=[],
suggested_partners=["generic_affiliate"],
monetization_potential="low"
)
# Get category config
config = self.patterns[best_match]
# Split category into main and sub
parts = best_match.split("_", 1)
main_category = parts[0]
subcategory = parts[1] if len(parts) > 1 else None
return IntentResult(
category=main_category,
subcategory=subcategory,
confidence=confidence,
keywords_matched=best_keywords,
suggested_partners=config.get("partners", []),
monetization_potential=config.get("potential", "medium")
)
def detect_geo(self, domain: str) -> Optional[str]:
"""
Detect geographic targeting from domain name.
Returns:
ISO country code if detected (e.g., "CH", "DE"), None otherwise
"""
domain_clean = self._normalize_domain(domain)
parts = set(self._split_domain_parts(domain_clean))
# Check TLD first
if domain.endswith(".ch") or domain.endswith(".swiss"):
return "CH"
if domain.endswith(".de"):
return "DE"
if domain.endswith(".at"):
return "AT"
# Check city names
if parts & self.swiss_cities:
return "CH"
if parts & self.german_cities:
return "DE"
return None
def estimate_value(self, domain: str) -> dict:
"""
Estimate the monetization value of a domain.
Returns dict with value estimates based on intent and traffic potential.
"""
intent = self.detect(domain)
geo = self.detect_geo(domain)
# Base value by potential
base_values = {
"high": {"min": 50, "max": 500},
"medium": {"min": 20, "max": 100},
"low": {"min": 5, "max": 30}
}
potential = intent.monetization_potential
base = base_values.get(potential, base_values["low"])
# Adjust for geo (Swiss = premium)
multiplier = 1.5 if geo == "CH" else 1.0
# Adjust for confidence
confidence_mult = 0.5 + (intent.confidence * 0.5)
return {
"estimated_monthly_min": int(base["min"] * multiplier * confidence_mult),
"estimated_monthly_max": int(base["max"] * multiplier * confidence_mult),
"currency": "CHF" if geo == "CH" else "EUR",
"potential": potential,
"confidence": intent.confidence,
"geo": geo
}
def _normalize_domain(self, domain: str) -> str:
"""Remove TLD and normalize domain string."""
# Remove common TLDs
domain = re.sub(r'\.(com|net|org|ch|de|at|io|co|info|swiss)$', '', domain.lower())
# Replace common separators with space
domain = re.sub(r'[-_.]', ' ', domain)
return domain.strip()
def _split_domain_parts(self, domain_clean: str) -> list[str]:
"""Split domain into meaningful parts."""
# Split on spaces (from separators)
parts = domain_clean.split()
# Also try to split camelCase or compound words
expanded = []
for part in parts:
# Try to find compound word boundaries
expanded.append(part)
# Add any sub-matches for longer words
if len(part) > 6:
expanded.extend(self._find_subwords(part))
return expanded
def _find_subwords(self, word: str) -> list[str]:
"""Find meaningful subwords in compound words."""
subwords = []
# Check if any keywords are contained in this word
for config in self.patterns.values():
for keyword in config["keywords"]:
if keyword in word and keyword != word:
subwords.append(keyword)
return subwords
def _score_category(self, parts: list[str], config: dict) -> tuple[float, list[str]]:
"""
Score how well domain parts match a category.
Returns (score, matched_keywords)
"""
score = 0.0
matched = []
keywords = set(config.get("keywords", []))
patterns = config.get("patterns", [])
for part in parts:
# Exact keyword match
if part in keywords:
score += 1.0
matched.append(part)
continue
# Partial keyword match
for kw in keywords:
if kw in part or part in kw:
score += 0.5
matched.append(f"{part}~{kw}")
break
# Regex pattern match
for pattern in patterns:
if re.match(pattern, part):
score += 0.7
matched.append(f"{part}@{pattern}")
break
return score, matched
# Singleton instance
_detector = None
def get_intent_detector() -> IntentDetector:
"""Get singleton IntentDetector instance."""
global _detector
if _detector is None:
_detector = IntentDetector()
return _detector
def detect_domain_intent(domain: str) -> IntentResult:
"""Convenience function to detect intent for a domain."""
return get_intent_detector().detect(domain)
def estimate_domain_yield(domain: str) -> dict:
"""Convenience function to estimate yield value for a domain."""
detector = get_intent_detector()
intent = detector.detect(domain)
value = detector.estimate_value(domain)
return {
"domain": domain,
"intent": {
"category": intent.category,
"subcategory": intent.subcategory,
"confidence": intent.confidence,
"keywords": intent.keywords_matched
},
"value": value,
"partners": intent.suggested_partners,
"monetization_potential": intent.monetization_potential
}