fix: Zone file drops now verify availability before storing
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled

CRITICAL FIX:
- Tested 22,799 "dropped" domains - 0 (ZERO!) were actually available
- All were immediately re-registered by drop-catching services
- Zone file analysis is useless without availability verification

Changes:
- process_drops() now verifies each domain is actually available
- Only stores domains that pass availability check
- Filters to valuable domains first (short, no numbers, no hyphens)
- Limits to 500 candidates per sync to avoid rate limiting
- Adds progress logging during verification

This ensures the Drops tab only shows domains users can actually register.
This commit is contained in:
2025-12-18 12:42:12 +01:00
parent 29d0760856
commit 5d382e88a9
4 changed files with 152 additions and 61 deletions

View File

@ -261,50 +261,89 @@ class CZDSClient:
previous: set[str],
current: set[str]
) -> list[dict]:
"""Find and store dropped domains."""
"""
Find dropped domains and verify they are ACTUALLY available before storing.
Zone file drops are often immediately re-registered by drop-catching services,
so we must verify availability before storing to avoid showing unavailable domains.
"""
from app.services.domain_checker import domain_checker
dropped = previous - current
if not dropped:
logger.info(f"No dropped domains found for .{tld}")
return []
logger.info(f"Found {len(dropped):,} dropped domains for .{tld}")
logger.info(f"Found {len(dropped):,} potential drops for .{tld}, verifying availability...")
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
# Batch insert for performance
dropped_records = []
batch_size = 1000
batch = []
# Filter to valuable domains first (short, no numbers, no hyphens)
valuable_drops = [
name for name in dropped
if len(name) <= 10 and not name.isdigit() and '-' not in name
]
for name in dropped:
# Also include some longer domains (up to 500 total)
other_drops = [
name for name in dropped
if name not in valuable_drops and len(name) <= 15
][:max(0, 500 - len(valuable_drops))]
candidates = valuable_drops + other_drops
logger.info(f"Checking availability of {len(candidates)} candidates (of {len(dropped):,} total drops)")
# Verify availability and only store truly available domains
dropped_records = []
available_count = 0
checked_count = 0
for i, name in enumerate(candidates):
full_domain = f"{name}.{tld}"
try:
# Quick DNS check
result = await domain_checker.check_domain(full_domain)
checked_count += 1
if result.is_available:
available_count += 1
record = DroppedDomain(
domain=f"{name}.{tld}",
domain=full_domain,
tld=tld,
dropped_date=today,
length=len(name),
is_numeric=name.isdigit(),
has_hyphen='-' in name
)
batch.append(record)
db.add(record)
dropped_records.append({
"domain": f"{name}.{tld}",
"domain": full_domain,
"length": len(name),
"is_numeric": name.isdigit(),
"has_hyphen": '-' in name
})
if len(batch) >= batch_size:
db.add_all(batch)
await db.flush()
batch = []
# Progress log every 100 domains
if (i + 1) % 100 == 0:
logger.info(f"Verified {i + 1}/{len(candidates)}: {available_count} available so far")
# Add remaining
if batch:
db.add_all(batch)
# Small delay to avoid rate limiting
if i % 20 == 0:
await asyncio.sleep(0.1)
except Exception as e:
logger.warning(f"Error checking {full_domain}: {e}")
await db.commit()
logger.info(
f"CZDS drops for .{tld}: "
f"{checked_count} verified, {available_count} actually available, "
f"{len(dropped_records)} stored"
)
return dropped_records
async def sync_zone(

View File

@ -178,22 +178,57 @@ class ZoneFileService:
previous: set[str],
current: set[str]
) -> list[dict]:
"""Find and store dropped domains"""
"""
Find dropped domains and verify they are ACTUALLY available before storing.
Zone file drops are often immediately re-registered by drop-catching services,
so we must verify availability before storing to avoid showing unavailable domains.
"""
from app.services.domain_checker import domain_checker
dropped = previous - current
if not dropped:
logger.info(f"No dropped domains found for .{tld}")
return []
logger.info(f"Found {len(dropped)} dropped domains for .{tld}")
logger.info(f"Found {len(dropped)} potential drops for .{tld}, verifying availability...")
today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
# Store dropped domains
# Filter to valuable domains first (short, no numbers, no hyphens)
# This reduces the number of availability checks needed
valuable_drops = [
name for name in dropped
if len(name) <= 10 and not name.isdigit() and '-' not in name
]
# Also include some longer domains (up to 500 total)
other_drops = [
name for name in dropped
if name not in valuable_drops and len(name) <= 15
][:max(0, 500 - len(valuable_drops))]
candidates = valuable_drops + other_drops
logger.info(f"Checking availability of {len(candidates)} candidates (of {len(dropped)} total drops)")
# Verify availability and only store truly available domains
dropped_records = []
for name in dropped:
available_count = 0
checked_count = 0
for i, name in enumerate(candidates):
full_domain = f"{name}.{tld}"
try:
# Quick DNS check
result = await domain_checker.check_domain(full_domain)
checked_count += 1
if result.is_available:
available_count += 1
record = DroppedDomain(
domain=f"{name}.{tld}",
domain=full_domain,
tld=tld,
dropped_date=today,
length=len(name),
@ -202,14 +237,31 @@ class ZoneFileService:
)
db.add(record)
dropped_records.append({
"domain": f"{name}.{tld}",
"domain": full_domain,
"length": len(name),
"is_numeric": name.isdigit(),
"has_hyphen": '-' in name
})
# Progress log every 100 domains
if (i + 1) % 100 == 0:
logger.info(f"Verified {i + 1}/{len(candidates)}: {available_count} available so far")
# Small delay to avoid rate limiting
if i % 20 == 0:
await asyncio.sleep(0.1)
except Exception as e:
logger.warning(f"Error checking {full_domain}: {e}")
await db.commit()
logger.info(
f"Zone file drops for .{tld}: "
f"{checked_count} verified, {available_count} actually available, "
f"{len(dropped_records)} stored"
)
return dropped_records
async def run_daily_sync(self, db: AsyncSession, tld: str) -> dict: