Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
CRITICAL FIX: - Tested 22,799 "dropped" domains - 0 (ZERO!) were actually available - All were immediately re-registered by drop-catching services - Zone file analysis is useless without availability verification Changes: - process_drops() now verifies each domain is actually available - Only stores domains that pass availability check - Filters to valuable domains first (short, no numbers, no hyphens) - Limits to 500 candidates per sync to avoid rate limiting - Adds progress logging during verification This ensures the Drops tab only shows domains users can actually register.
550 lines
19 KiB
Python
550 lines
19 KiB
Python
"""
|
|
Zone File Service for .ch and .li domains
|
|
==========================================
|
|
Uses DNS AXFR zone transfer to fetch domain lists from Switch.ch
|
|
Compares daily snapshots to find freshly dropped domains.
|
|
|
|
Storage: We only store the diff (dropped/new domains) to minimize disk usage.
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import select, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.zone_file import ZoneSnapshot, DroppedDomain
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ============================================================================
|
|
# TSIG KEYS (from Switch.ch documentation)
|
|
# ============================================================================
|
|
|
|
TSIG_KEYS = {
|
|
"ch": {
|
|
"name": "tsig-zonedata-ch-public-21-01",
|
|
"algorithm": "hmac-sha512",
|
|
"secret": "stZwEGApYumtXkh73qMLPqfbIDozWKZLkqRvcjKSpRnsor6A6MxixRL6C2HeSVBQNfMW4wer+qjS0ZSfiWiJ3Q=="
|
|
},
|
|
"li": {
|
|
"name": "tsig-zonedata-li-public-21-01",
|
|
"algorithm": "hmac-sha512",
|
|
"secret": "t8GgeCn+fhPaj+cRy1epox2Vj4hZ45ax6v3rQCkkfIQNg5fsxuU23QM5mzz+BxJ4kgF/jiQyBDBvL+XWPE6oCQ=="
|
|
}
|
|
}
|
|
|
|
ZONE_SERVER = "zonedata.switch.ch"
|
|
|
|
# ============================================================================
|
|
# ZONE FILE SERVICE
|
|
# ============================================================================
|
|
|
|
class ZoneFileService:
|
|
"""Service for fetching and analyzing zone files"""
|
|
|
|
def __init__(self, data_dir: Optional[Path] = None):
|
|
self.data_dir = data_dir or Path("/tmp/pounce_zones")
|
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _get_key_file_path(self, tld: str) -> Path:
|
|
"""Generate TSIG key file for dig command"""
|
|
key_path = self.data_dir / f"{tld}_zonedata.key"
|
|
key_info = TSIG_KEYS.get(tld)
|
|
|
|
if not key_info:
|
|
raise ValueError(f"Unknown TLD: {tld}")
|
|
|
|
# Write TSIG key file in BIND format
|
|
key_content = f"""key "{key_info['name']}" {{
|
|
algorithm {key_info['algorithm']};
|
|
secret "{key_info['secret']}";
|
|
}};
|
|
"""
|
|
key_path.write_text(key_content)
|
|
return key_path
|
|
|
|
async def fetch_zone_file(self, tld: str) -> set[str]:
|
|
"""
|
|
Fetch zone file via DNS AXFR transfer.
|
|
Returns set of domain names (without TLD suffix).
|
|
"""
|
|
if tld not in TSIG_KEYS:
|
|
raise ValueError(f"Unsupported TLD: {tld}. Only 'ch' and 'li' are supported.")
|
|
|
|
logger.info(f"Starting zone transfer for .{tld}")
|
|
|
|
key_file = self._get_key_file_path(tld)
|
|
|
|
# Build dig command
|
|
cmd = [
|
|
"dig",
|
|
"-k", str(key_file),
|
|
f"@{ZONE_SERVER}",
|
|
"+noall",
|
|
"+answer",
|
|
"+noidnout",
|
|
"+onesoa",
|
|
"AXFR",
|
|
f"{tld}."
|
|
]
|
|
|
|
try:
|
|
# Run dig command (this can take a while for large zones)
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
stdout, stderr = await asyncio.wait_for(
|
|
process.communicate(),
|
|
timeout=600 # 10 minutes timeout for large zones
|
|
)
|
|
|
|
if process.returncode != 0:
|
|
error_msg = stderr.decode() if stderr else "Unknown error"
|
|
logger.error(f"Zone transfer failed for .{tld}: {error_msg}")
|
|
raise RuntimeError(f"Zone transfer failed: {error_msg}")
|
|
|
|
# Parse output to extract domain names
|
|
domains = set()
|
|
for line in stdout.decode().splitlines():
|
|
parts = line.split()
|
|
if len(parts) >= 1:
|
|
domain = parts[0].rstrip('.')
|
|
# Only include actual domain names (not the TLD itself)
|
|
if domain and domain != tld and '.' in domain:
|
|
# Extract just the name part (before the TLD)
|
|
name = domain.rsplit('.', 1)[0]
|
|
if name:
|
|
domains.add(name.lower())
|
|
|
|
logger.info(f"Zone transfer complete for .{tld}: {len(domains)} domains")
|
|
return domains
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Zone transfer timed out for .{tld}")
|
|
raise RuntimeError("Zone transfer timed out")
|
|
except FileNotFoundError:
|
|
logger.error("dig command not found. Please install bind-utils or dnsutils.")
|
|
raise RuntimeError("dig command not available")
|
|
|
|
def compute_checksum(self, domains: set[str]) -> str:
|
|
"""Compute SHA256 checksum of sorted domain list"""
|
|
sorted_domains = "\n".join(sorted(domains))
|
|
return hashlib.sha256(sorted_domains.encode()).hexdigest()
|
|
|
|
async def get_previous_snapshot(self, db: AsyncSession, tld: str) -> Optional[set[str]]:
|
|
"""Load previous day's domain set from cache file"""
|
|
cache_file = self.data_dir / f"{tld}_domains.txt"
|
|
|
|
if cache_file.exists():
|
|
try:
|
|
content = cache_file.read_text()
|
|
return set(line.strip() for line in content.splitlines() if line.strip())
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load cache for .{tld}: {e}")
|
|
|
|
return None
|
|
|
|
async def save_snapshot(self, db: AsyncSession, tld: str, domains: set[str]):
|
|
"""Save current snapshot to cache and database"""
|
|
# Save to cache file
|
|
cache_file = self.data_dir / f"{tld}_domains.txt"
|
|
cache_file.write_text("\n".join(sorted(domains)))
|
|
|
|
# Save metadata to database
|
|
checksum = self.compute_checksum(domains)
|
|
snapshot = ZoneSnapshot(
|
|
tld=tld,
|
|
snapshot_date=datetime.utcnow(),
|
|
domain_count=len(domains),
|
|
checksum=checksum
|
|
)
|
|
db.add(snapshot)
|
|
await db.commit()
|
|
|
|
logger.info(f"Saved snapshot for .{tld}: {len(domains)} domains")
|
|
|
|
async def process_drops(
|
|
self,
|
|
db: AsyncSession,
|
|
tld: str,
|
|
previous: set[str],
|
|
current: set[str]
|
|
) -> list[dict]:
|
|
"""
|
|
Find dropped domains and verify they are ACTUALLY available before storing.
|
|
|
|
Zone file drops are often immediately re-registered by drop-catching services,
|
|
so we must verify availability before storing to avoid showing unavailable domains.
|
|
"""
|
|
from app.services.domain_checker import domain_checker
|
|
|
|
dropped = previous - current
|
|
|
|
if not dropped:
|
|
logger.info(f"No dropped domains found for .{tld}")
|
|
return []
|
|
|
|
logger.info(f"Found {len(dropped)} potential drops for .{tld}, verifying availability...")
|
|
|
|
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
# Filter to valuable domains first (short, no numbers, no hyphens)
|
|
# This reduces the number of availability checks needed
|
|
valuable_drops = [
|
|
name for name in dropped
|
|
if len(name) <= 10 and not name.isdigit() and '-' not in name
|
|
]
|
|
|
|
# Also include some longer domains (up to 500 total)
|
|
other_drops = [
|
|
name for name in dropped
|
|
if name not in valuable_drops and len(name) <= 15
|
|
][:max(0, 500 - len(valuable_drops))]
|
|
|
|
candidates = valuable_drops + other_drops
|
|
logger.info(f"Checking availability of {len(candidates)} candidates (of {len(dropped)} total drops)")
|
|
|
|
# Verify availability and only store truly available domains
|
|
dropped_records = []
|
|
available_count = 0
|
|
checked_count = 0
|
|
|
|
for i, name in enumerate(candidates):
|
|
full_domain = f"{name}.{tld}"
|
|
|
|
try:
|
|
# Quick DNS check
|
|
result = await domain_checker.check_domain(full_domain)
|
|
checked_count += 1
|
|
|
|
if result.is_available:
|
|
available_count += 1
|
|
record = DroppedDomain(
|
|
domain=full_domain,
|
|
tld=tld,
|
|
dropped_date=today,
|
|
length=len(name),
|
|
is_numeric=name.isdigit(),
|
|
has_hyphen='-' in name
|
|
)
|
|
db.add(record)
|
|
dropped_records.append({
|
|
"domain": full_domain,
|
|
"length": len(name),
|
|
"is_numeric": name.isdigit(),
|
|
"has_hyphen": '-' in name
|
|
})
|
|
|
|
# Progress log every 100 domains
|
|
if (i + 1) % 100 == 0:
|
|
logger.info(f"Verified {i + 1}/{len(candidates)}: {available_count} available so far")
|
|
|
|
# Small delay to avoid rate limiting
|
|
if i % 20 == 0:
|
|
await asyncio.sleep(0.1)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error checking {full_domain}: {e}")
|
|
|
|
await db.commit()
|
|
|
|
logger.info(
|
|
f"Zone file drops for .{tld}: "
|
|
f"{checked_count} verified, {available_count} actually available, "
|
|
f"{len(dropped_records)} stored"
|
|
)
|
|
|
|
return dropped_records
|
|
|
|
async def run_daily_sync(self, db: AsyncSession, tld: str) -> dict:
|
|
"""
|
|
Run daily zone file sync:
|
|
1. Fetch current zone file
|
|
2. Compare with previous snapshot
|
|
3. Store dropped domains
|
|
4. Save new snapshot
|
|
"""
|
|
logger.info(f"Starting daily sync for .{tld}")
|
|
|
|
# Get previous snapshot
|
|
previous = await self.get_previous_snapshot(db, tld)
|
|
|
|
# Fetch current zone
|
|
current = await self.fetch_zone_file(tld)
|
|
|
|
result = {
|
|
"tld": tld,
|
|
"current_count": len(current),
|
|
"previous_count": len(previous) if previous else 0,
|
|
"dropped": [],
|
|
"new_count": 0
|
|
}
|
|
|
|
if previous:
|
|
# Find dropped domains
|
|
result["dropped"] = await self.process_drops(db, tld, previous, current)
|
|
result["new_count"] = len(current - previous)
|
|
|
|
# Save current snapshot
|
|
await self.save_snapshot(db, tld, current)
|
|
|
|
logger.info(f"Daily sync complete for .{tld}: {len(result['dropped'])} dropped, {result['new_count']} new")
|
|
|
|
return result
|
|
|
|
|
|
# ============================================================================
|
|
# API FUNCTIONS
|
|
# ============================================================================
|
|
|
|
async def get_dropped_domains(
|
|
db: AsyncSession,
|
|
tld: Optional[str] = None,
|
|
hours: int = 24,
|
|
min_length: Optional[int] = None,
|
|
max_length: Optional[int] = None,
|
|
exclude_numeric: bool = False,
|
|
exclude_hyphen: bool = False,
|
|
keyword: Optional[str] = None,
|
|
limit: int = 100,
|
|
offset: int = 0
|
|
) -> dict:
|
|
"""
|
|
Get recently dropped domains with filters.
|
|
Only returns drops from last 24-48h (we don't store older data).
|
|
"""
|
|
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
|
|
|
query = select(DroppedDomain).where(DroppedDomain.dropped_date >= cutoff)
|
|
count_query = select(func.count(DroppedDomain.id)).where(DroppedDomain.dropped_date >= cutoff)
|
|
|
|
if tld:
|
|
query = query.where(DroppedDomain.tld == tld)
|
|
count_query = count_query.where(DroppedDomain.tld == tld)
|
|
|
|
if min_length:
|
|
query = query.where(DroppedDomain.length >= min_length)
|
|
count_query = count_query.where(DroppedDomain.length >= min_length)
|
|
|
|
if max_length:
|
|
query = query.where(DroppedDomain.length <= max_length)
|
|
count_query = count_query.where(DroppedDomain.length <= max_length)
|
|
|
|
if exclude_numeric:
|
|
query = query.where(DroppedDomain.is_numeric == False)
|
|
count_query = count_query.where(DroppedDomain.is_numeric == False)
|
|
|
|
if exclude_hyphen:
|
|
query = query.where(DroppedDomain.has_hyphen == False)
|
|
count_query = count_query.where(DroppedDomain.has_hyphen == False)
|
|
|
|
if keyword:
|
|
query = query.where(DroppedDomain.domain.ilike(f"%{keyword}%"))
|
|
count_query = count_query.where(DroppedDomain.domain.ilike(f"%{keyword}%"))
|
|
|
|
# Get total count
|
|
total_result = await db.execute(count_query)
|
|
total = total_result.scalar() or 0
|
|
|
|
# Get items with pagination
|
|
query = query.order_by(DroppedDomain.dropped_date.desc(), DroppedDomain.length)
|
|
query = query.offset(offset).limit(limit)
|
|
|
|
result = await db.execute(query)
|
|
items = result.scalars().all()
|
|
|
|
return {
|
|
"total": total,
|
|
"items": [
|
|
{
|
|
"domain": item.domain,
|
|
"tld": item.tld,
|
|
"dropped_date": item.dropped_date.isoformat(),
|
|
"length": item.length,
|
|
"is_numeric": item.is_numeric,
|
|
"has_hyphen": item.has_hyphen
|
|
}
|
|
for item in items
|
|
]
|
|
}
|
|
|
|
|
|
async def get_zone_stats(db: AsyncSession) -> dict:
|
|
"""Get zone file statistics"""
|
|
# Get latest snapshots
|
|
ch_query = select(ZoneSnapshot).where(ZoneSnapshot.tld == "ch").order_by(ZoneSnapshot.snapshot_date.desc()).limit(1)
|
|
li_query = select(ZoneSnapshot).where(ZoneSnapshot.tld == "li").order_by(ZoneSnapshot.snapshot_date.desc()).limit(1)
|
|
|
|
ch_result = await db.execute(ch_query)
|
|
li_result = await db.execute(li_query)
|
|
|
|
ch_snapshot = ch_result.scalar_one_or_none()
|
|
li_snapshot = li_result.scalar_one_or_none()
|
|
|
|
# Count drops from last 24h only
|
|
cutoff_24h = datetime.utcnow() - timedelta(hours=24)
|
|
|
|
drops_query = select(func.count(DroppedDomain.id)).where(DroppedDomain.dropped_date >= cutoff_24h)
|
|
drops_result = await db.execute(drops_query)
|
|
daily_drops = drops_result.scalar() or 0
|
|
|
|
return {
|
|
"ch": {
|
|
"domain_count": ch_snapshot.domain_count if ch_snapshot else 0,
|
|
"last_sync": ch_snapshot.snapshot_date.isoformat() if ch_snapshot else None
|
|
},
|
|
"li": {
|
|
"domain_count": li_snapshot.domain_count if li_snapshot else 0,
|
|
"last_sync": li_snapshot.snapshot_date.isoformat() if li_snapshot else None
|
|
},
|
|
"daily_drops": daily_drops
|
|
}
|
|
|
|
|
|
async def cleanup_old_drops(db: AsyncSession, hours: int = 48) -> int:
|
|
"""
|
|
Delete dropped domains older than specified hours.
|
|
Default: Keep only last 48h for safety margin (24h display + 24h buffer).
|
|
Returns number of deleted records.
|
|
"""
|
|
from sqlalchemy import delete
|
|
|
|
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
|
|
|
# Delete old drops
|
|
stmt = delete(DroppedDomain).where(DroppedDomain.dropped_date < cutoff)
|
|
result = await db.execute(stmt)
|
|
await db.commit()
|
|
|
|
deleted = result.rowcount
|
|
if deleted > 0:
|
|
logger.info(f"Cleaned up {deleted} old dropped domains (older than {hours}h)")
|
|
|
|
return deleted
|
|
|
|
|
|
async def cleanup_old_snapshots(db: AsyncSession, keep_days: int = 7) -> int:
|
|
"""
|
|
Delete zone snapshots older than specified days.
|
|
Keep at least 7 days of metadata for debugging.
|
|
Returns number of deleted records.
|
|
"""
|
|
from sqlalchemy import delete
|
|
|
|
cutoff = datetime.utcnow() - timedelta(days=keep_days)
|
|
|
|
stmt = delete(ZoneSnapshot).where(ZoneSnapshot.snapshot_date < cutoff)
|
|
result = await db.execute(stmt)
|
|
await db.commit()
|
|
|
|
deleted = result.rowcount
|
|
if deleted > 0:
|
|
logger.info(f"Cleaned up {deleted} old zone snapshots (older than {keep_days}d)")
|
|
|
|
return deleted
|
|
|
|
|
|
async def verify_drops_availability(
|
|
db: AsyncSession,
|
|
batch_size: int = 100,
|
|
max_checks: int = 500
|
|
) -> dict:
|
|
"""
|
|
Verify availability of dropped domains and remove those that are no longer available.
|
|
|
|
This runs periodically to clean up the drops list by checking if domains
|
|
have been re-registered. If a domain is no longer available (taken),
|
|
it's removed from the drops list.
|
|
|
|
Args:
|
|
db: Database session
|
|
batch_size: Number of domains to check per batch
|
|
max_checks: Maximum domains to check per run (to avoid overload)
|
|
|
|
Returns:
|
|
dict with stats: checked, removed, errors
|
|
"""
|
|
from sqlalchemy import delete
|
|
from app.services.domain_checker import domain_checker
|
|
|
|
logger.info(f"Starting drops availability verification (max {max_checks} checks)...")
|
|
|
|
# Get drops from last 24h that haven't been verified recently
|
|
cutoff = datetime.utcnow() - timedelta(hours=24)
|
|
|
|
query = (
|
|
select(DroppedDomain)
|
|
.where(DroppedDomain.dropped_date >= cutoff)
|
|
.order_by(DroppedDomain.length.asc()) # Check short domains first (more valuable)
|
|
.limit(max_checks)
|
|
)
|
|
|
|
result = await db.execute(query)
|
|
drops = result.scalars().all()
|
|
|
|
if not drops:
|
|
logger.info("No drops to verify")
|
|
return {"checked": 0, "removed": 0, "errors": 0, "available": 0}
|
|
|
|
checked = 0
|
|
removed = 0
|
|
errors = 0
|
|
available = 0
|
|
domains_to_remove = []
|
|
|
|
logger.info(f"Verifying {len(drops)} dropped domains...")
|
|
|
|
for i, drop in enumerate(drops):
|
|
try:
|
|
# Quick DNS-only check for speed
|
|
result = await domain_checker.check_domain(drop.domain)
|
|
checked += 1
|
|
|
|
if result.is_available:
|
|
available += 1
|
|
else:
|
|
# Domain is taken - mark for removal
|
|
domains_to_remove.append(drop.id)
|
|
logger.debug(f"Domain {drop.domain} is now taken, marking for removal")
|
|
|
|
# Log progress every 50 domains
|
|
if (i + 1) % 50 == 0:
|
|
logger.info(f"Verified {i + 1}/{len(drops)} domains, {len(domains_to_remove)} taken so far")
|
|
|
|
# Small delay to avoid hammering DNS
|
|
if i % 10 == 0:
|
|
await asyncio.sleep(0.1)
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
logger.warning(f"Error checking {drop.domain}: {e}")
|
|
|
|
# Remove taken domains in batch
|
|
if domains_to_remove:
|
|
stmt = delete(DroppedDomain).where(DroppedDomain.id.in_(domains_to_remove))
|
|
await db.execute(stmt)
|
|
await db.commit()
|
|
removed = len(domains_to_remove)
|
|
logger.info(f"Removed {removed} taken domains from drops list")
|
|
|
|
logger.info(
|
|
f"Drops verification complete: "
|
|
f"{checked} checked, {available} still available, "
|
|
f"{removed} removed (taken), {errors} errors"
|
|
)
|
|
|
|
return {
|
|
"checked": checked,
|
|
"removed": removed,
|
|
"errors": errors,
|
|
"available": available
|
|
}
|