pounce/backend/app/services/seo_analyzer.py
yves.gugger bd1f81a804 feat: Marketplace navigation + SEO fix + tab-based listings
MARKETPLACE INTEGRATION:
- Added 'Marketplace' (/buy) to public Header navigation
- Renamed 'For Sale' to 'Marketplace' in Command Center Sidebar

LISTINGS PAGE REDESIGN:
- Added tab-based layout: 'Browse Marketplace' / 'My Listings'
- Browse tab: Search + grid view of all public listings
- My Listings tab: Full management with stats
- Unified experience to view marketplace and manage own listings

SEO JUICE DETECTOR FIX:
- Fixed 500 error when database table doesn't exist
- Added fallback: _format_dict_response for when DB is unavailable
- Service now gracefully handles missing tables
- Returns estimated data even on cache failures
2025-12-10 12:05:49 +01:00

447 lines
17 KiB
Python

"""
SEO Analyzer Service - "SEO Juice Detector"
This implements Strategie 3 from analysis_3.md:
"SEO-Agenturen suchen Domains wegen der Power (Backlinks).
Solche Domains sind für SEOs 100€ - 500€ wert, auch wenn der Name hässlich ist."
Data Sources (in priority order):
1. Moz API (if MOZ_ACCESS_ID and MOZ_SECRET_KEY are set)
2. CommonCrawl Index (free, but limited)
3. Estimation based on domain characteristics
This is a TYCOON-ONLY feature.
"""
import os
import logging
import base64
import hashlib
import hmac
import time
import httpx
from datetime import datetime, timedelta
from typing import Optional, Dict, Any, List
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.models.seo_data import DomainSEOData
logger = logging.getLogger(__name__)
class SEOAnalyzerService:
"""
Analyzes domains for SEO value (backlinks, authority, etc.)
From analysis_3.md:
"Domain `alte-bäckerei-münchen.de` ist frei.
Hat Links von `sueddeutsche.de` und `wikipedia.org`."
"""
# Moz API configuration
MOZ_API_URL = "https://lsapi.seomoz.com/v2/url_metrics"
MOZ_LINKS_URL = "https://lsapi.seomoz.com/v2/links"
# Cache duration (7 days for SEO data)
CACHE_DURATION_DAYS = 7
# Known high-authority domains for notable link detection
NOTABLE_DOMAINS = {
'wikipedia': ['wikipedia.org', 'wikimedia.org'],
'gov': ['.gov', '.gov.uk', '.admin.ch', '.bund.de'],
'edu': ['.edu', '.ac.uk', '.ethz.ch', '.uzh.ch'],
'news': [
'nytimes.com', 'theguardian.com', 'bbc.com', 'cnn.com',
'forbes.com', 'bloomberg.com', 'reuters.com', 'techcrunch.com',
'spiegel.de', 'faz.net', 'nzz.ch', 'tagesanzeiger.ch'
]
}
def __init__(self):
self.moz_access_id = os.getenv('MOZ_ACCESS_ID')
self.moz_secret_key = os.getenv('MOZ_SECRET_KEY')
self.has_moz = bool(self.moz_access_id and self.moz_secret_key)
if self.has_moz:
logger.info("SEO Analyzer: Moz API configured")
else:
logger.warning("SEO Analyzer: No Moz API keys - using estimation mode")
async def analyze_domain(
self,
domain: str,
db: AsyncSession,
force_refresh: bool = False
) -> Dict[str, Any]:
"""
Analyze a domain for SEO value.
Returns:
Dict with SEO metrics, backlinks, and value estimate
"""
domain = domain.lower().strip()
try:
# Check cache first
if not force_refresh:
try:
cached = await self._get_cached(domain, db)
if cached and not cached.is_expired:
return self._format_response(cached)
except Exception as e:
# Table might not exist yet
logger.warning(f"Cache check failed (table may not exist): {e}")
# Fetch fresh data
if self.has_moz:
seo_data = await self._fetch_moz_data(domain)
else:
seo_data = await self._estimate_seo_data(domain)
# Try to save to cache (may fail if table doesn't exist)
try:
cached = await self._save_to_cache(domain, seo_data, db)
return self._format_response(cached)
except Exception as e:
logger.warning(f"Cache save failed (table may not exist): {e}")
# Return data directly without caching
return self._format_dict_response(domain, seo_data)
except Exception as e:
logger.error(f"SEO analysis failed for {domain}: {e}")
# Return estimated data on any error
seo_data = await self._estimate_seo_data(domain)
return self._format_dict_response(domain, seo_data)
async def _get_cached(self, domain: str, db: AsyncSession) -> Optional[DomainSEOData]:
"""Get cached SEO data for a domain."""
result = await db.execute(
select(DomainSEOData).where(DomainSEOData.domain == domain)
)
return result.scalar_one_or_none()
async def _save_to_cache(
self,
domain: str,
data: Dict[str, Any],
db: AsyncSession
) -> DomainSEOData:
"""Save SEO data to cache."""
# Check if exists
result = await db.execute(
select(DomainSEOData).where(DomainSEOData.domain == domain)
)
cached = result.scalar_one_or_none()
if cached:
# Update existing
for key, value in data.items():
if hasattr(cached, key):
setattr(cached, key, value)
cached.last_updated = datetime.utcnow()
cached.expires_at = datetime.utcnow() + timedelta(days=self.CACHE_DURATION_DAYS)
cached.fetch_count += 1
else:
# Create new
cached = DomainSEOData(
domain=domain,
expires_at=datetime.utcnow() + timedelta(days=self.CACHE_DURATION_DAYS),
**data
)
db.add(cached)
await db.commit()
await db.refresh(cached)
return cached
async def _fetch_moz_data(self, domain: str) -> Dict[str, Any]:
"""Fetch SEO data from Moz API."""
try:
# Generate authentication
expires = int(time.time()) + 300
string_to_sign = f"{self.moz_access_id}\n{expires}"
signature = base64.b64encode(
hmac.new(
self.moz_secret_key.encode('utf-8'),
string_to_sign.encode('utf-8'),
hashlib.sha1
).digest()
).decode('utf-8')
auth_params = {
'AccessID': self.moz_access_id,
'Expires': expires,
'Signature': signature
}
async with httpx.AsyncClient(timeout=30) as client:
# Get URL metrics
response = await client.post(
self.MOZ_API_URL,
params=auth_params,
json={
'targets': [f'http://{domain}/'],
}
)
if response.status_code == 200:
metrics = response.json()
if metrics and 'results' in metrics and metrics['results']:
result = metrics['results'][0]
# Extract notable backlinks
top_backlinks = await self._fetch_top_backlinks(
domain, auth_params, client
)
return {
'domain_authority': result.get('domain_authority', 0),
'page_authority': result.get('page_authority', 0),
'spam_score': result.get('spam_score', 0),
'total_backlinks': result.get('external_links_to_root_domain', 0),
'referring_domains': result.get('root_domains_to_root_domain', 0),
'top_backlinks': top_backlinks,
'notable_backlinks': self._extract_notable(top_backlinks),
'has_wikipedia_link': self._has_notable_link(top_backlinks, 'wikipedia'),
'has_gov_link': self._has_notable_link(top_backlinks, 'gov'),
'has_edu_link': self._has_notable_link(top_backlinks, 'edu'),
'has_news_link': self._has_notable_link(top_backlinks, 'news'),
'seo_value_estimate': self._calculate_seo_value(result),
'data_source': 'moz',
}
logger.warning(f"Moz API returned {response.status_code} for {domain}")
except Exception as e:
logger.error(f"Moz API error for {domain}: {e}")
# Fallback to estimation
return await self._estimate_seo_data(domain)
async def _fetch_top_backlinks(
self,
domain: str,
auth_params: dict,
client: httpx.AsyncClient
) -> List[Dict[str, Any]]:
"""Fetch top backlinks from Moz."""
try:
response = await client.post(
self.MOZ_LINKS_URL,
params=auth_params,
json={
'target': f'http://{domain}/',
'target_scope': 'root_domain',
'filter': 'external+nofollow',
'sort': 'domain_authority',
'limit': 20
}
)
if response.status_code == 200:
data = response.json()
if 'results' in data:
return [
{
'domain': link.get('source', {}).get('root_domain', ''),
'authority': link.get('source', {}).get('domain_authority', 0),
'page': link.get('source', {}).get('page', ''),
}
for link in data['results'][:10]
]
except Exception as e:
logger.error(f"Error fetching backlinks: {e}")
return []
async def _estimate_seo_data(self, domain: str) -> Dict[str, Any]:
"""
Estimate SEO data when no API is available.
Uses heuristics based on domain characteristics.
"""
# Extract domain parts
parts = domain.split('.')
name = parts[0] if parts else domain
tld = parts[-1] if len(parts) > 1 else ''
# Estimate domain authority based on characteristics
estimated_da = 10 # Base
# Short domains tend to have more backlinks
if len(name) <= 4:
estimated_da += 15
elif len(name) <= 6:
estimated_da += 10
elif len(name) <= 8:
estimated_da += 5
# Premium TLDs
premium_tlds = {'com': 10, 'org': 8, 'net': 5, 'io': 7, 'ai': 8, 'co': 6}
estimated_da += premium_tlds.get(tld, 0)
# Dictionary words get a boost
common_words = ['tech', 'app', 'data', 'cloud', 'web', 'net', 'hub', 'lab', 'dev']
if any(word in name.lower() for word in common_words):
estimated_da += 5
# Cap at reasonable estimate
estimated_da = min(40, estimated_da)
# Estimate backlinks based on DA
estimated_backlinks = estimated_da * 50
estimated_referring = estimated_da * 5
return {
'domain_authority': estimated_da,
'page_authority': max(0, estimated_da - 5),
'spam_score': 5, # Assume low spam for estimates
'total_backlinks': estimated_backlinks,
'referring_domains': estimated_referring,
'top_backlinks': [],
'notable_backlinks': None,
'has_wikipedia_link': False,
'has_gov_link': False,
'has_edu_link': False,
'has_news_link': False,
'seo_value_estimate': self._estimate_value(estimated_da),
'data_source': 'estimated',
}
def _has_notable_link(self, backlinks: List[Dict], category: str) -> bool:
"""Check if backlinks contain notable sources."""
domains_to_check = self.NOTABLE_DOMAINS.get(category, [])
for link in backlinks:
link_domain = link.get('domain', '').lower()
for notable in domains_to_check:
if notable in link_domain:
return True
return False
def _extract_notable(self, backlinks: List[Dict]) -> Optional[str]:
"""Extract notable backlink domains as comma-separated string."""
notable = []
for link in backlinks:
domain = link.get('domain', '')
authority = link.get('authority', 0)
# Include high-authority links
if authority >= 50:
notable.append(domain)
return ','.join(notable[:10]) if notable else None
def _calculate_seo_value(self, metrics: Dict) -> float:
"""Calculate estimated SEO value in USD."""
da = metrics.get('domain_authority', 0)
backlinks = metrics.get('external_links_to_root_domain', 0)
# Base value from DA
if da >= 60:
base_value = 500
elif da >= 40:
base_value = 200
elif da >= 20:
base_value = 50
else:
base_value = 10
# Boost for backlinks
link_boost = min(backlinks / 100, 10) * 20
return round(base_value + link_boost, 2)
def _estimate_value(self, da: int) -> float:
"""Estimate value based on estimated DA."""
if da >= 40:
return 200
elif da >= 30:
return 100
elif da >= 20:
return 50
return 20
def _format_response(self, data: DomainSEOData) -> Dict[str, Any]:
"""Format SEO data for API response."""
return {
'domain': data.domain,
'seo_score': data.seo_score,
'value_category': data.value_category,
'metrics': {
'domain_authority': data.domain_authority,
'page_authority': data.page_authority,
'spam_score': data.spam_score,
'total_backlinks': data.total_backlinks,
'referring_domains': data.referring_domains,
},
'notable_links': {
'has_wikipedia': data.has_wikipedia_link,
'has_gov': data.has_gov_link,
'has_edu': data.has_edu_link,
'has_news': data.has_news_link,
'notable_domains': data.notable_backlinks.split(',') if data.notable_backlinks else [],
},
'top_backlinks': data.top_backlinks or [],
'estimated_value': data.seo_value_estimate,
'data_source': data.data_source,
'last_updated': data.last_updated.isoformat() if data.last_updated else None,
'is_estimated': data.data_source == 'estimated',
}
def _format_dict_response(self, domain: str, data: Dict[str, Any]) -> Dict[str, Any]:
"""Format SEO data from dict (when DB is not available)."""
da = data.get('domain_authority', 0) or 0
# Calculate SEO score
seo_score = da
if data.get('has_wikipedia_link'):
seo_score = min(100, seo_score + 10)
if data.get('has_gov_link'):
seo_score = min(100, seo_score + 5)
if data.get('has_edu_link'):
seo_score = min(100, seo_score + 5)
if data.get('has_news_link'):
seo_score = min(100, seo_score + 3)
# Determine value category
if seo_score >= 60:
value_category = "High Value"
elif seo_score >= 40:
value_category = "Medium Value"
elif seo_score >= 20:
value_category = "Low Value"
else:
value_category = "Minimal"
return {
'domain': domain,
'seo_score': seo_score,
'value_category': value_category,
'metrics': {
'domain_authority': data.get('domain_authority'),
'page_authority': data.get('page_authority'),
'spam_score': data.get('spam_score'),
'total_backlinks': data.get('total_backlinks'),
'referring_domains': data.get('referring_domains'),
},
'notable_links': {
'has_wikipedia': data.get('has_wikipedia_link', False),
'has_gov': data.get('has_gov_link', False),
'has_edu': data.get('has_edu_link', False),
'has_news': data.get('has_news_link', False),
'notable_domains': data.get('notable_backlinks', '').split(',') if data.get('notable_backlinks') else [],
},
'top_backlinks': data.get('top_backlinks', []),
'estimated_value': data.get('seo_value_estimate'),
'data_source': data.get('data_source', 'estimated'),
'last_updated': datetime.utcnow().isoformat(),
'is_estimated': data.get('data_source') == 'estimated',
}
# Singleton instance
seo_analyzer = SEOAnalyzerService()