Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
401 lines
14 KiB
Python
401 lines
14 KiB
Python
"""
|
|
Zone File Service for .ch and .li domains
|
|
==========================================
|
|
Uses DNS AXFR zone transfer to fetch domain lists from Switch.ch
|
|
Compares daily snapshots to find freshly dropped domains.
|
|
|
|
Storage: We only store the diff (dropped/new domains) to minimize disk usage.
|
|
"""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from sqlalchemy import select, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.models.zone_file import ZoneSnapshot, DroppedDomain
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ============================================================================
|
|
# TSIG KEYS (from Switch.ch documentation)
|
|
# ============================================================================
|
|
|
|
TSIG_KEYS = {
|
|
"ch": {
|
|
"name": "tsig-zonedata-ch-public-21-01",
|
|
"algorithm": "hmac-sha512",
|
|
"secret": "stZwEGApYumtXkh73qMLPqfbIDozWKZLkqRvcjKSpRnsor6A6MxixRL6C2HeSVBQNfMW4wer+qjS0ZSfiWiJ3Q=="
|
|
},
|
|
"li": {
|
|
"name": "tsig-zonedata-li-public-21-01",
|
|
"algorithm": "hmac-sha512",
|
|
"secret": "t8GgeCn+fhPaj+cRy1epox2Vj4hZ45ax6v3rQCkkfIQNg5fsxuU23QM5mzz+BxJ4kgF/jiQyBDBvL+XWPE6oCQ=="
|
|
}
|
|
}
|
|
|
|
ZONE_SERVER = "zonedata.switch.ch"
|
|
|
|
# ============================================================================
|
|
# ZONE FILE SERVICE
|
|
# ============================================================================
|
|
|
|
class ZoneFileService:
|
|
"""Service for fetching and analyzing zone files"""
|
|
|
|
def __init__(self, data_dir: Optional[Path] = None):
|
|
self.data_dir = data_dir or Path("/tmp/pounce_zones")
|
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _get_key_file_path(self, tld: str) -> Path:
|
|
"""Generate TSIG key file for dig command"""
|
|
key_path = self.data_dir / f"{tld}_zonedata.key"
|
|
key_info = TSIG_KEYS.get(tld)
|
|
|
|
if not key_info:
|
|
raise ValueError(f"Unknown TLD: {tld}")
|
|
|
|
# Write TSIG key file in BIND format
|
|
key_content = f"""key "{key_info['name']}" {{
|
|
algorithm {key_info['algorithm']};
|
|
secret "{key_info['secret']}";
|
|
}};
|
|
"""
|
|
key_path.write_text(key_content)
|
|
return key_path
|
|
|
|
async def fetch_zone_file(self, tld: str) -> set[str]:
|
|
"""
|
|
Fetch zone file via DNS AXFR transfer.
|
|
Returns set of domain names (without TLD suffix).
|
|
"""
|
|
if tld not in TSIG_KEYS:
|
|
raise ValueError(f"Unsupported TLD: {tld}. Only 'ch' and 'li' are supported.")
|
|
|
|
logger.info(f"Starting zone transfer for .{tld}")
|
|
|
|
key_file = self._get_key_file_path(tld)
|
|
|
|
# Build dig command
|
|
cmd = [
|
|
"dig",
|
|
"-k", str(key_file),
|
|
f"@{ZONE_SERVER}",
|
|
"+noall",
|
|
"+answer",
|
|
"+noidnout",
|
|
"+onesoa",
|
|
"AXFR",
|
|
f"{tld}."
|
|
]
|
|
|
|
try:
|
|
# Run dig command (this can take a while for large zones)
|
|
process = await asyncio.create_subprocess_exec(
|
|
*cmd,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE
|
|
)
|
|
stdout, stderr = await asyncio.wait_for(
|
|
process.communicate(),
|
|
timeout=600 # 10 minutes timeout for large zones
|
|
)
|
|
|
|
if process.returncode != 0:
|
|
error_msg = stderr.decode() if stderr else "Unknown error"
|
|
logger.error(f"Zone transfer failed for .{tld}: {error_msg}")
|
|
raise RuntimeError(f"Zone transfer failed: {error_msg}")
|
|
|
|
# Parse output to extract domain names
|
|
domains = set()
|
|
for line in stdout.decode().splitlines():
|
|
parts = line.split()
|
|
if len(parts) >= 1:
|
|
domain = parts[0].rstrip('.')
|
|
# Only include actual domain names (not the TLD itself)
|
|
if domain and domain != tld and '.' in domain:
|
|
# Extract just the name part (before the TLD)
|
|
name = domain.rsplit('.', 1)[0]
|
|
if name:
|
|
domains.add(name.lower())
|
|
|
|
logger.info(f"Zone transfer complete for .{tld}: {len(domains)} domains")
|
|
return domains
|
|
|
|
except asyncio.TimeoutError:
|
|
logger.error(f"Zone transfer timed out for .{tld}")
|
|
raise RuntimeError("Zone transfer timed out")
|
|
except FileNotFoundError:
|
|
logger.error("dig command not found. Please install bind-utils or dnsutils.")
|
|
raise RuntimeError("dig command not available")
|
|
|
|
def compute_checksum(self, domains: set[str]) -> str:
|
|
"""Compute SHA256 checksum of sorted domain list"""
|
|
sorted_domains = "\n".join(sorted(domains))
|
|
return hashlib.sha256(sorted_domains.encode()).hexdigest()
|
|
|
|
async def get_previous_snapshot(self, db: AsyncSession, tld: str) -> Optional[set[str]]:
|
|
"""Load previous day's domain set from cache file"""
|
|
cache_file = self.data_dir / f"{tld}_domains.txt"
|
|
|
|
if cache_file.exists():
|
|
try:
|
|
content = cache_file.read_text()
|
|
return set(line.strip() for line in content.splitlines() if line.strip())
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load cache for .{tld}: {e}")
|
|
|
|
return None
|
|
|
|
async def save_snapshot(self, db: AsyncSession, tld: str, domains: set[str]):
|
|
"""Save current snapshot to cache and database"""
|
|
# Save to cache file
|
|
cache_file = self.data_dir / f"{tld}_domains.txt"
|
|
cache_file.write_text("\n".join(sorted(domains)))
|
|
|
|
# Save metadata to database
|
|
checksum = self.compute_checksum(domains)
|
|
snapshot = ZoneSnapshot(
|
|
tld=tld,
|
|
snapshot_date=datetime.utcnow(),
|
|
domain_count=len(domains),
|
|
checksum=checksum
|
|
)
|
|
db.add(snapshot)
|
|
await db.commit()
|
|
|
|
logger.info(f"Saved snapshot for .{tld}: {len(domains)} domains")
|
|
|
|
async def process_drops(
|
|
self,
|
|
db: AsyncSession,
|
|
tld: str,
|
|
previous: set[str],
|
|
current: set[str]
|
|
) -> list[dict]:
|
|
"""Find and store dropped domains"""
|
|
dropped = previous - current
|
|
|
|
if not dropped:
|
|
logger.info(f"No dropped domains found for .{tld}")
|
|
return []
|
|
|
|
logger.info(f"Found {len(dropped)} dropped domains for .{tld}")
|
|
|
|
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
|
|
|
|
# Store dropped domains
|
|
dropped_records = []
|
|
for name in dropped:
|
|
record = DroppedDomain(
|
|
domain=f"{name}.{tld}",
|
|
tld=tld,
|
|
dropped_date=today,
|
|
length=len(name),
|
|
is_numeric=name.isdigit(),
|
|
has_hyphen='-' in name
|
|
)
|
|
db.add(record)
|
|
dropped_records.append({
|
|
"domain": f"{name}.{tld}",
|
|
"length": len(name),
|
|
"is_numeric": name.isdigit(),
|
|
"has_hyphen": '-' in name
|
|
})
|
|
|
|
await db.commit()
|
|
|
|
return dropped_records
|
|
|
|
async def run_daily_sync(self, db: AsyncSession, tld: str) -> dict:
|
|
"""
|
|
Run daily zone file sync:
|
|
1. Fetch current zone file
|
|
2. Compare with previous snapshot
|
|
3. Store dropped domains
|
|
4. Save new snapshot
|
|
"""
|
|
logger.info(f"Starting daily sync for .{tld}")
|
|
|
|
# Get previous snapshot
|
|
previous = await self.get_previous_snapshot(db, tld)
|
|
|
|
# Fetch current zone
|
|
current = await self.fetch_zone_file(tld)
|
|
|
|
result = {
|
|
"tld": tld,
|
|
"current_count": len(current),
|
|
"previous_count": len(previous) if previous else 0,
|
|
"dropped": [],
|
|
"new_count": 0
|
|
}
|
|
|
|
if previous:
|
|
# Find dropped domains
|
|
result["dropped"] = await self.process_drops(db, tld, previous, current)
|
|
result["new_count"] = len(current - previous)
|
|
|
|
# Save current snapshot
|
|
await self.save_snapshot(db, tld, current)
|
|
|
|
logger.info(f"Daily sync complete for .{tld}: {len(result['dropped'])} dropped, {result['new_count']} new")
|
|
|
|
return result
|
|
|
|
|
|
# ============================================================================
|
|
# API FUNCTIONS
|
|
# ============================================================================
|
|
|
|
async def get_dropped_domains(
|
|
db: AsyncSession,
|
|
tld: Optional[str] = None,
|
|
hours: int = 24,
|
|
min_length: Optional[int] = None,
|
|
max_length: Optional[int] = None,
|
|
exclude_numeric: bool = False,
|
|
exclude_hyphen: bool = False,
|
|
keyword: Optional[str] = None,
|
|
limit: int = 100,
|
|
offset: int = 0
|
|
) -> dict:
|
|
"""
|
|
Get recently dropped domains with filters.
|
|
Only returns drops from last 24-48h (we don't store older data).
|
|
"""
|
|
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
|
|
|
query = select(DroppedDomain).where(DroppedDomain.dropped_date >= cutoff)
|
|
count_query = select(func.count(DroppedDomain.id)).where(DroppedDomain.dropped_date >= cutoff)
|
|
|
|
if tld:
|
|
query = query.where(DroppedDomain.tld == tld)
|
|
count_query = count_query.where(DroppedDomain.tld == tld)
|
|
|
|
if min_length:
|
|
query = query.where(DroppedDomain.length >= min_length)
|
|
count_query = count_query.where(DroppedDomain.length >= min_length)
|
|
|
|
if max_length:
|
|
query = query.where(DroppedDomain.length <= max_length)
|
|
count_query = count_query.where(DroppedDomain.length <= max_length)
|
|
|
|
if exclude_numeric:
|
|
query = query.where(DroppedDomain.is_numeric == False)
|
|
count_query = count_query.where(DroppedDomain.is_numeric == False)
|
|
|
|
if exclude_hyphen:
|
|
query = query.where(DroppedDomain.has_hyphen == False)
|
|
count_query = count_query.where(DroppedDomain.has_hyphen == False)
|
|
|
|
if keyword:
|
|
query = query.where(DroppedDomain.domain.ilike(f"%{keyword}%"))
|
|
count_query = count_query.where(DroppedDomain.domain.ilike(f"%{keyword}%"))
|
|
|
|
# Get total count
|
|
total_result = await db.execute(count_query)
|
|
total = total_result.scalar() or 0
|
|
|
|
# Get items with pagination
|
|
query = query.order_by(DroppedDomain.dropped_date.desc(), DroppedDomain.length)
|
|
query = query.offset(offset).limit(limit)
|
|
|
|
result = await db.execute(query)
|
|
items = result.scalars().all()
|
|
|
|
return {
|
|
"total": total,
|
|
"items": [
|
|
{
|
|
"domain": item.domain,
|
|
"tld": item.tld,
|
|
"dropped_date": item.dropped_date.isoformat(),
|
|
"length": item.length,
|
|
"is_numeric": item.is_numeric,
|
|
"has_hyphen": item.has_hyphen
|
|
}
|
|
for item in items
|
|
]
|
|
}
|
|
|
|
|
|
async def get_zone_stats(db: AsyncSession) -> dict:
|
|
"""Get zone file statistics"""
|
|
# Get latest snapshots
|
|
ch_query = select(ZoneSnapshot).where(ZoneSnapshot.tld == "ch").order_by(ZoneSnapshot.snapshot_date.desc()).limit(1)
|
|
li_query = select(ZoneSnapshot).where(ZoneSnapshot.tld == "li").order_by(ZoneSnapshot.snapshot_date.desc()).limit(1)
|
|
|
|
ch_result = await db.execute(ch_query)
|
|
li_result = await db.execute(li_query)
|
|
|
|
ch_snapshot = ch_result.scalar_one_or_none()
|
|
li_snapshot = li_result.scalar_one_or_none()
|
|
|
|
# Count drops from last 24h only
|
|
cutoff_24h = datetime.utcnow() - timedelta(hours=24)
|
|
|
|
drops_query = select(func.count(DroppedDomain.id)).where(DroppedDomain.dropped_date >= cutoff_24h)
|
|
drops_result = await db.execute(drops_query)
|
|
daily_drops = drops_result.scalar() or 0
|
|
|
|
return {
|
|
"ch": {
|
|
"domain_count": ch_snapshot.domain_count if ch_snapshot else 0,
|
|
"last_sync": ch_snapshot.snapshot_date.isoformat() if ch_snapshot else None
|
|
},
|
|
"li": {
|
|
"domain_count": li_snapshot.domain_count if li_snapshot else 0,
|
|
"last_sync": li_snapshot.snapshot_date.isoformat() if li_snapshot else None
|
|
},
|
|
"daily_drops": daily_drops
|
|
}
|
|
|
|
|
|
async def cleanup_old_drops(db: AsyncSession, hours: int = 48) -> int:
|
|
"""
|
|
Delete dropped domains older than specified hours.
|
|
Default: Keep only last 48h for safety margin (24h display + 24h buffer).
|
|
Returns number of deleted records.
|
|
"""
|
|
from sqlalchemy import delete
|
|
|
|
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
|
|
|
# Delete old drops
|
|
stmt = delete(DroppedDomain).where(DroppedDomain.dropped_date < cutoff)
|
|
result = await db.execute(stmt)
|
|
await db.commit()
|
|
|
|
deleted = result.rowcount
|
|
if deleted > 0:
|
|
logger.info(f"Cleaned up {deleted} old dropped domains (older than {hours}h)")
|
|
|
|
return deleted
|
|
|
|
|
|
async def cleanup_old_snapshots(db: AsyncSession, keep_days: int = 7) -> int:
|
|
"""
|
|
Delete zone snapshots older than specified days.
|
|
Keep at least 7 days of metadata for debugging.
|
|
Returns number of deleted records.
|
|
"""
|
|
from sqlalchemy import delete
|
|
|
|
cutoff = datetime.utcnow() - timedelta(days=keep_days)
|
|
|
|
stmt = delete(ZoneSnapshot).where(ZoneSnapshot.snapshot_date < cutoff)
|
|
result = await db.execute(stmt)
|
|
await db.commit()
|
|
|
|
deleted = result.rowcount
|
|
if deleted > 0:
|
|
logger.info(f"Cleaned up {deleted} old zone snapshots (older than {keep_days}d)")
|
|
|
|
return deleted
|