perf: Batch verify drops status + bulk DB updates

This commit is contained in:
2025-12-21 16:53:30 +01:00
parent f36d55f814
commit 9d99e6ee0a

View File

@ -501,8 +501,9 @@ async def verify_drops_availability(
Returns: Returns:
dict with stats: checked, available, dropping_soon, taken, errors dict with stats: checked, available, dropping_soon, taken, errors
""" """
from sqlalchemy import update from sqlalchemy import update, bindparam, case
from app.services.drop_status_checker import check_drop_status from app.services.drop_status_checker import check_drops_batch
from app.config import get_settings
logger.info(f"Starting drops status update (max {max_checks} checks)...") logger.info(f"Starting drops status update (max {max_checks} checks)...")
@ -510,16 +511,26 @@ async def verify_drops_availability(
cutoff = datetime.utcnow() - timedelta(hours=24) cutoff = datetime.utcnow() - timedelta(hours=24)
check_cutoff = datetime.utcnow() - timedelta(hours=2) # Re-check every 2 hours check_cutoff = datetime.utcnow() - timedelta(hours=2) # Re-check every 2 hours
# Prioritization (fast + predictable):
# 1) never checked first
# 2) then oldest check first
# 3) then unknown status
# 4) then shortest domains first
unknown_first = case((DroppedDomain.availability_status == "unknown", 0), else_=1)
never_checked_first = case((DroppedDomain.last_status_check.is_(None), 0), else_=1)
query = ( query = (
select(DroppedDomain) select(DroppedDomain)
.where(DroppedDomain.dropped_date >= cutoff) .where(DroppedDomain.dropped_date >= cutoff)
.where( .where(
(DroppedDomain.last_status_check == None) | # Never checked (DroppedDomain.last_status_check.is_(None)) # Never checked
(DroppedDomain.last_status_check < check_cutoff) # Not checked recently | (DroppedDomain.last_status_check < check_cutoff) # Not checked recently
) )
.order_by( .order_by(
DroppedDomain.availability_status.desc(), # Unknown first never_checked_first.asc(),
DroppedDomain.length.asc() # Then short domains DroppedDomain.last_status_check.asc().nullsfirst(),
unknown_first.asc(),
DroppedDomain.length.asc(),
) )
.limit(max_checks) .limit(max_checks)
) )
@ -535,41 +546,59 @@ async def verify_drops_availability(
stats = {"available": 0, "dropping_soon": 0, "taken": 0, "unknown": 0} stats = {"available": 0, "dropping_soon": 0, "taken": 0, "unknown": 0}
errors = 0 errors = 0
logger.info(f"Checking {len(drops)} dropped domains...") logger.info(f"Checking {len(drops)} dropped domains (batch mode)...")
for i, drop in enumerate(drops): settings = get_settings()
full_domain = f"{drop.domain}.{drop.tld}" delay = float(getattr(settings, "domain_check_delay_seconds", 0.3) or 0.3)
try: max_concurrent = int(getattr(settings, "domain_check_max_concurrent", 3) or 3)
status_result = await check_drop_status(full_domain)
# Build (drop_id, domain) tuples for batch checker
domain_tuples: list[tuple[int, str]] = [(d.id, f"{d.domain}.{d.tld}") for d in drops]
# Process in batches to bound memory + keep DB commits reasonable
now = datetime.utcnow()
for start in range(0, len(domain_tuples), batch_size):
batch = domain_tuples[start : start + batch_size]
results = await check_drops_batch(
batch,
delay_between_requests=delay,
max_concurrent=max_concurrent,
)
# Prepare bulk updates
updates: list[dict] = []
for drop_id, status_result in results:
checked += 1 checked += 1
stats[status_result.status] = stats.get(status_result.status, 0) + 1 stats[status_result.status] = stats.get(status_result.status, 0) + 1
# Update in DB updates.append(
await db.execute( {
update(DroppedDomain) "id": drop_id,
.where(DroppedDomain.id == drop.id) "availability_status": status_result.status,
.values( "rdap_status": str(status_result.rdap_status)[:255] if status_result.rdap_status else None,
availability_status=status_result.status, "last_status_check": now,
rdap_status=str(status_result.rdap_status)[:255] if status_result.rdap_status else None, "deletion_date": status_result.deletion_date,
last_status_check=datetime.utcnow(), }
deletion_date=status_result.deletion_date,
)
) )
# Log progress every 25 domains # Bulk update using executemany
if (i + 1) % 25 == 0: stmt = (
logger.info(f"Checked {i + 1}/{len(drops)}: {stats}") update(DroppedDomain)
await db.commit() # Commit in batches .where(DroppedDomain.id == bindparam("id"))
.values(
availability_status=bindparam("availability_status"),
rdap_status=bindparam("rdap_status"),
last_status_check=bindparam("last_status_check"),
deletion_date=bindparam("deletion_date"),
)
)
await db.execute(stmt, updates)
await db.commit()
# Rate limit: 200ms between requests (5 req/sec) logger.info(f"Checked {min(start + batch_size, len(domain_tuples))}/{len(domain_tuples)}: {stats}")
await asyncio.sleep(0.2)
except Exception as e:
errors += 1
logger.warning(f"Error checking {full_domain}: {e}")
# Final commit # Final commit
await db.commit() # (already committed per batch)
logger.info( logger.info(
f"Drops status update complete: " f"Drops status update complete: "