MARKETPLACE INTEGRATION: - Added 'Marketplace' (/buy) to public Header navigation - Renamed 'For Sale' to 'Marketplace' in Command Center Sidebar LISTINGS PAGE REDESIGN: - Added tab-based layout: 'Browse Marketplace' / 'My Listings' - Browse tab: Search + grid view of all public listings - My Listings tab: Full management with stats - Unified experience to view marketplace and manage own listings SEO JUICE DETECTOR FIX: - Fixed 500 error when database table doesn't exist - Added fallback: _format_dict_response for when DB is unavailable - Service now gracefully handles missing tables - Returns estimated data even on cache failures
447 lines
17 KiB
Python
447 lines
17 KiB
Python
"""
|
|
SEO Analyzer Service - "SEO Juice Detector"
|
|
|
|
This implements Strategie 3 from analysis_3.md:
|
|
"SEO-Agenturen suchen Domains wegen der Power (Backlinks).
|
|
Solche Domains sind für SEOs 100€ - 500€ wert, auch wenn der Name hässlich ist."
|
|
|
|
Data Sources (in priority order):
|
|
1. Moz API (if MOZ_ACCESS_ID and MOZ_SECRET_KEY are set)
|
|
2. CommonCrawl Index (free, but limited)
|
|
3. Estimation based on domain characteristics
|
|
|
|
This is a TYCOON-ONLY feature.
|
|
"""
|
|
import os
|
|
import logging
|
|
import base64
|
|
import hashlib
|
|
import hmac
|
|
import time
|
|
import httpx
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional, Dict, Any, List
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.seo_data import DomainSEOData
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SEOAnalyzerService:
|
|
"""
|
|
Analyzes domains for SEO value (backlinks, authority, etc.)
|
|
|
|
From analysis_3.md:
|
|
"Domain `alte-bäckerei-münchen.de` ist frei.
|
|
Hat Links von `sueddeutsche.de` und `wikipedia.org`."
|
|
"""
|
|
|
|
# Moz API configuration
|
|
MOZ_API_URL = "https://lsapi.seomoz.com/v2/url_metrics"
|
|
MOZ_LINKS_URL = "https://lsapi.seomoz.com/v2/links"
|
|
|
|
# Cache duration (7 days for SEO data)
|
|
CACHE_DURATION_DAYS = 7
|
|
|
|
# Known high-authority domains for notable link detection
|
|
NOTABLE_DOMAINS = {
|
|
'wikipedia': ['wikipedia.org', 'wikimedia.org'],
|
|
'gov': ['.gov', '.gov.uk', '.admin.ch', '.bund.de'],
|
|
'edu': ['.edu', '.ac.uk', '.ethz.ch', '.uzh.ch'],
|
|
'news': [
|
|
'nytimes.com', 'theguardian.com', 'bbc.com', 'cnn.com',
|
|
'forbes.com', 'bloomberg.com', 'reuters.com', 'techcrunch.com',
|
|
'spiegel.de', 'faz.net', 'nzz.ch', 'tagesanzeiger.ch'
|
|
]
|
|
}
|
|
|
|
def __init__(self):
|
|
self.moz_access_id = os.getenv('MOZ_ACCESS_ID')
|
|
self.moz_secret_key = os.getenv('MOZ_SECRET_KEY')
|
|
self.has_moz = bool(self.moz_access_id and self.moz_secret_key)
|
|
|
|
if self.has_moz:
|
|
logger.info("SEO Analyzer: Moz API configured")
|
|
else:
|
|
logger.warning("SEO Analyzer: No Moz API keys - using estimation mode")
|
|
|
|
async def analyze_domain(
|
|
self,
|
|
domain: str,
|
|
db: AsyncSession,
|
|
force_refresh: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze a domain for SEO value.
|
|
|
|
Returns:
|
|
Dict with SEO metrics, backlinks, and value estimate
|
|
"""
|
|
domain = domain.lower().strip()
|
|
|
|
try:
|
|
# Check cache first
|
|
if not force_refresh:
|
|
try:
|
|
cached = await self._get_cached(domain, db)
|
|
if cached and not cached.is_expired:
|
|
return self._format_response(cached)
|
|
except Exception as e:
|
|
# Table might not exist yet
|
|
logger.warning(f"Cache check failed (table may not exist): {e}")
|
|
|
|
# Fetch fresh data
|
|
if self.has_moz:
|
|
seo_data = await self._fetch_moz_data(domain)
|
|
else:
|
|
seo_data = await self._estimate_seo_data(domain)
|
|
|
|
# Try to save to cache (may fail if table doesn't exist)
|
|
try:
|
|
cached = await self._save_to_cache(domain, seo_data, db)
|
|
return self._format_response(cached)
|
|
except Exception as e:
|
|
logger.warning(f"Cache save failed (table may not exist): {e}")
|
|
# Return data directly without caching
|
|
return self._format_dict_response(domain, seo_data)
|
|
|
|
except Exception as e:
|
|
logger.error(f"SEO analysis failed for {domain}: {e}")
|
|
# Return estimated data on any error
|
|
seo_data = await self._estimate_seo_data(domain)
|
|
return self._format_dict_response(domain, seo_data)
|
|
|
|
async def _get_cached(self, domain: str, db: AsyncSession) -> Optional[DomainSEOData]:
|
|
"""Get cached SEO data for a domain."""
|
|
result = await db.execute(
|
|
select(DomainSEOData).where(DomainSEOData.domain == domain)
|
|
)
|
|
return result.scalar_one_or_none()
|
|
|
|
async def _save_to_cache(
|
|
self,
|
|
domain: str,
|
|
data: Dict[str, Any],
|
|
db: AsyncSession
|
|
) -> DomainSEOData:
|
|
"""Save SEO data to cache."""
|
|
# Check if exists
|
|
result = await db.execute(
|
|
select(DomainSEOData).where(DomainSEOData.domain == domain)
|
|
)
|
|
cached = result.scalar_one_or_none()
|
|
|
|
if cached:
|
|
# Update existing
|
|
for key, value in data.items():
|
|
if hasattr(cached, key):
|
|
setattr(cached, key, value)
|
|
cached.last_updated = datetime.utcnow()
|
|
cached.expires_at = datetime.utcnow() + timedelta(days=self.CACHE_DURATION_DAYS)
|
|
cached.fetch_count += 1
|
|
else:
|
|
# Create new
|
|
cached = DomainSEOData(
|
|
domain=domain,
|
|
expires_at=datetime.utcnow() + timedelta(days=self.CACHE_DURATION_DAYS),
|
|
**data
|
|
)
|
|
db.add(cached)
|
|
|
|
await db.commit()
|
|
await db.refresh(cached)
|
|
return cached
|
|
|
|
async def _fetch_moz_data(self, domain: str) -> Dict[str, Any]:
|
|
"""Fetch SEO data from Moz API."""
|
|
try:
|
|
# Generate authentication
|
|
expires = int(time.time()) + 300
|
|
string_to_sign = f"{self.moz_access_id}\n{expires}"
|
|
signature = base64.b64encode(
|
|
hmac.new(
|
|
self.moz_secret_key.encode('utf-8'),
|
|
string_to_sign.encode('utf-8'),
|
|
hashlib.sha1
|
|
).digest()
|
|
).decode('utf-8')
|
|
|
|
auth_params = {
|
|
'AccessID': self.moz_access_id,
|
|
'Expires': expires,
|
|
'Signature': signature
|
|
}
|
|
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
# Get URL metrics
|
|
response = await client.post(
|
|
self.MOZ_API_URL,
|
|
params=auth_params,
|
|
json={
|
|
'targets': [f'http://{domain}/'],
|
|
}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
metrics = response.json()
|
|
if metrics and 'results' in metrics and metrics['results']:
|
|
result = metrics['results'][0]
|
|
|
|
# Extract notable backlinks
|
|
top_backlinks = await self._fetch_top_backlinks(
|
|
domain, auth_params, client
|
|
)
|
|
|
|
return {
|
|
'domain_authority': result.get('domain_authority', 0),
|
|
'page_authority': result.get('page_authority', 0),
|
|
'spam_score': result.get('spam_score', 0),
|
|
'total_backlinks': result.get('external_links_to_root_domain', 0),
|
|
'referring_domains': result.get('root_domains_to_root_domain', 0),
|
|
'top_backlinks': top_backlinks,
|
|
'notable_backlinks': self._extract_notable(top_backlinks),
|
|
'has_wikipedia_link': self._has_notable_link(top_backlinks, 'wikipedia'),
|
|
'has_gov_link': self._has_notable_link(top_backlinks, 'gov'),
|
|
'has_edu_link': self._has_notable_link(top_backlinks, 'edu'),
|
|
'has_news_link': self._has_notable_link(top_backlinks, 'news'),
|
|
'seo_value_estimate': self._calculate_seo_value(result),
|
|
'data_source': 'moz',
|
|
}
|
|
|
|
logger.warning(f"Moz API returned {response.status_code} for {domain}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Moz API error for {domain}: {e}")
|
|
|
|
# Fallback to estimation
|
|
return await self._estimate_seo_data(domain)
|
|
|
|
async def _fetch_top_backlinks(
|
|
self,
|
|
domain: str,
|
|
auth_params: dict,
|
|
client: httpx.AsyncClient
|
|
) -> List[Dict[str, Any]]:
|
|
"""Fetch top backlinks from Moz."""
|
|
try:
|
|
response = await client.post(
|
|
self.MOZ_LINKS_URL,
|
|
params=auth_params,
|
|
json={
|
|
'target': f'http://{domain}/',
|
|
'target_scope': 'root_domain',
|
|
'filter': 'external+nofollow',
|
|
'sort': 'domain_authority',
|
|
'limit': 20
|
|
}
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
if 'results' in data:
|
|
return [
|
|
{
|
|
'domain': link.get('source', {}).get('root_domain', ''),
|
|
'authority': link.get('source', {}).get('domain_authority', 0),
|
|
'page': link.get('source', {}).get('page', ''),
|
|
}
|
|
for link in data['results'][:10]
|
|
]
|
|
except Exception as e:
|
|
logger.error(f"Error fetching backlinks: {e}")
|
|
|
|
return []
|
|
|
|
async def _estimate_seo_data(self, domain: str) -> Dict[str, Any]:
|
|
"""
|
|
Estimate SEO data when no API is available.
|
|
|
|
Uses heuristics based on domain characteristics.
|
|
"""
|
|
# Extract domain parts
|
|
parts = domain.split('.')
|
|
name = parts[0] if parts else domain
|
|
tld = parts[-1] if len(parts) > 1 else ''
|
|
|
|
# Estimate domain authority based on characteristics
|
|
estimated_da = 10 # Base
|
|
|
|
# Short domains tend to have more backlinks
|
|
if len(name) <= 4:
|
|
estimated_da += 15
|
|
elif len(name) <= 6:
|
|
estimated_da += 10
|
|
elif len(name) <= 8:
|
|
estimated_da += 5
|
|
|
|
# Premium TLDs
|
|
premium_tlds = {'com': 10, 'org': 8, 'net': 5, 'io': 7, 'ai': 8, 'co': 6}
|
|
estimated_da += premium_tlds.get(tld, 0)
|
|
|
|
# Dictionary words get a boost
|
|
common_words = ['tech', 'app', 'data', 'cloud', 'web', 'net', 'hub', 'lab', 'dev']
|
|
if any(word in name.lower() for word in common_words):
|
|
estimated_da += 5
|
|
|
|
# Cap at reasonable estimate
|
|
estimated_da = min(40, estimated_da)
|
|
|
|
# Estimate backlinks based on DA
|
|
estimated_backlinks = estimated_da * 50
|
|
estimated_referring = estimated_da * 5
|
|
|
|
return {
|
|
'domain_authority': estimated_da,
|
|
'page_authority': max(0, estimated_da - 5),
|
|
'spam_score': 5, # Assume low spam for estimates
|
|
'total_backlinks': estimated_backlinks,
|
|
'referring_domains': estimated_referring,
|
|
'top_backlinks': [],
|
|
'notable_backlinks': None,
|
|
'has_wikipedia_link': False,
|
|
'has_gov_link': False,
|
|
'has_edu_link': False,
|
|
'has_news_link': False,
|
|
'seo_value_estimate': self._estimate_value(estimated_da),
|
|
'data_source': 'estimated',
|
|
}
|
|
|
|
def _has_notable_link(self, backlinks: List[Dict], category: str) -> bool:
|
|
"""Check if backlinks contain notable sources."""
|
|
domains_to_check = self.NOTABLE_DOMAINS.get(category, [])
|
|
|
|
for link in backlinks:
|
|
link_domain = link.get('domain', '').lower()
|
|
for notable in domains_to_check:
|
|
if notable in link_domain:
|
|
return True
|
|
return False
|
|
|
|
def _extract_notable(self, backlinks: List[Dict]) -> Optional[str]:
|
|
"""Extract notable backlink domains as comma-separated string."""
|
|
notable = []
|
|
|
|
for link in backlinks:
|
|
domain = link.get('domain', '')
|
|
authority = link.get('authority', 0)
|
|
|
|
# Include high-authority links
|
|
if authority >= 50:
|
|
notable.append(domain)
|
|
|
|
return ','.join(notable[:10]) if notable else None
|
|
|
|
def _calculate_seo_value(self, metrics: Dict) -> float:
|
|
"""Calculate estimated SEO value in USD."""
|
|
da = metrics.get('domain_authority', 0)
|
|
backlinks = metrics.get('external_links_to_root_domain', 0)
|
|
|
|
# Base value from DA
|
|
if da >= 60:
|
|
base_value = 500
|
|
elif da >= 40:
|
|
base_value = 200
|
|
elif da >= 20:
|
|
base_value = 50
|
|
else:
|
|
base_value = 10
|
|
|
|
# Boost for backlinks
|
|
link_boost = min(backlinks / 100, 10) * 20
|
|
|
|
return round(base_value + link_boost, 2)
|
|
|
|
def _estimate_value(self, da: int) -> float:
|
|
"""Estimate value based on estimated DA."""
|
|
if da >= 40:
|
|
return 200
|
|
elif da >= 30:
|
|
return 100
|
|
elif da >= 20:
|
|
return 50
|
|
return 20
|
|
|
|
def _format_response(self, data: DomainSEOData) -> Dict[str, Any]:
|
|
"""Format SEO data for API response."""
|
|
return {
|
|
'domain': data.domain,
|
|
'seo_score': data.seo_score,
|
|
'value_category': data.value_category,
|
|
'metrics': {
|
|
'domain_authority': data.domain_authority,
|
|
'page_authority': data.page_authority,
|
|
'spam_score': data.spam_score,
|
|
'total_backlinks': data.total_backlinks,
|
|
'referring_domains': data.referring_domains,
|
|
},
|
|
'notable_links': {
|
|
'has_wikipedia': data.has_wikipedia_link,
|
|
'has_gov': data.has_gov_link,
|
|
'has_edu': data.has_edu_link,
|
|
'has_news': data.has_news_link,
|
|
'notable_domains': data.notable_backlinks.split(',') if data.notable_backlinks else [],
|
|
},
|
|
'top_backlinks': data.top_backlinks or [],
|
|
'estimated_value': data.seo_value_estimate,
|
|
'data_source': data.data_source,
|
|
'last_updated': data.last_updated.isoformat() if data.last_updated else None,
|
|
'is_estimated': data.data_source == 'estimated',
|
|
}
|
|
|
|
def _format_dict_response(self, domain: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Format SEO data from dict (when DB is not available)."""
|
|
da = data.get('domain_authority', 0) or 0
|
|
|
|
# Calculate SEO score
|
|
seo_score = da
|
|
if data.get('has_wikipedia_link'):
|
|
seo_score = min(100, seo_score + 10)
|
|
if data.get('has_gov_link'):
|
|
seo_score = min(100, seo_score + 5)
|
|
if data.get('has_edu_link'):
|
|
seo_score = min(100, seo_score + 5)
|
|
if data.get('has_news_link'):
|
|
seo_score = min(100, seo_score + 3)
|
|
|
|
# Determine value category
|
|
if seo_score >= 60:
|
|
value_category = "High Value"
|
|
elif seo_score >= 40:
|
|
value_category = "Medium Value"
|
|
elif seo_score >= 20:
|
|
value_category = "Low Value"
|
|
else:
|
|
value_category = "Minimal"
|
|
|
|
return {
|
|
'domain': domain,
|
|
'seo_score': seo_score,
|
|
'value_category': value_category,
|
|
'metrics': {
|
|
'domain_authority': data.get('domain_authority'),
|
|
'page_authority': data.get('page_authority'),
|
|
'spam_score': data.get('spam_score'),
|
|
'total_backlinks': data.get('total_backlinks'),
|
|
'referring_domains': data.get('referring_domains'),
|
|
},
|
|
'notable_links': {
|
|
'has_wikipedia': data.get('has_wikipedia_link', False),
|
|
'has_gov': data.get('has_gov_link', False),
|
|
'has_edu': data.get('has_edu_link', False),
|
|
'has_news': data.get('has_news_link', False),
|
|
'notable_domains': data.get('notable_backlinks', '').split(',') if data.get('notable_backlinks') else [],
|
|
},
|
|
'top_backlinks': data.get('top_backlinks', []),
|
|
'estimated_value': data.get('seo_value_estimate'),
|
|
'data_source': data.get('data_source', 'estimated'),
|
|
'last_updated': datetime.utcnow().isoformat(),
|
|
'is_estimated': data.get('data_source') == 'estimated',
|
|
}
|
|
|
|
|
|
# Singleton instance
|
|
seo_analyzer = SEOAnalyzerService()
|
|
|