perf: Batch verify drops status + bulk DB updates
This commit is contained in:
@ -501,8 +501,9 @@ async def verify_drops_availability(
|
||||
Returns:
|
||||
dict with stats: checked, available, dropping_soon, taken, errors
|
||||
"""
|
||||
from sqlalchemy import update
|
||||
from app.services.drop_status_checker import check_drop_status
|
||||
from sqlalchemy import update, bindparam, case
|
||||
from app.services.drop_status_checker import check_drops_batch
|
||||
from app.config import get_settings
|
||||
|
||||
logger.info(f"Starting drops status update (max {max_checks} checks)...")
|
||||
|
||||
@ -510,16 +511,26 @@ async def verify_drops_availability(
|
||||
cutoff = datetime.utcnow() - timedelta(hours=24)
|
||||
check_cutoff = datetime.utcnow() - timedelta(hours=2) # Re-check every 2 hours
|
||||
|
||||
# Prioritization (fast + predictable):
|
||||
# 1) never checked first
|
||||
# 2) then oldest check first
|
||||
# 3) then unknown status
|
||||
# 4) then shortest domains first
|
||||
unknown_first = case((DroppedDomain.availability_status == "unknown", 0), else_=1)
|
||||
never_checked_first = case((DroppedDomain.last_status_check.is_(None), 0), else_=1)
|
||||
|
||||
query = (
|
||||
select(DroppedDomain)
|
||||
.where(DroppedDomain.dropped_date >= cutoff)
|
||||
.where(
|
||||
(DroppedDomain.last_status_check == None) | # Never checked
|
||||
(DroppedDomain.last_status_check < check_cutoff) # Not checked recently
|
||||
(DroppedDomain.last_status_check.is_(None)) # Never checked
|
||||
| (DroppedDomain.last_status_check < check_cutoff) # Not checked recently
|
||||
)
|
||||
.order_by(
|
||||
DroppedDomain.availability_status.desc(), # Unknown first
|
||||
DroppedDomain.length.asc() # Then short domains
|
||||
never_checked_first.asc(),
|
||||
DroppedDomain.last_status_check.asc().nullsfirst(),
|
||||
unknown_first.asc(),
|
||||
DroppedDomain.length.asc(),
|
||||
)
|
||||
.limit(max_checks)
|
||||
)
|
||||
@ -535,41 +546,59 @@ async def verify_drops_availability(
|
||||
stats = {"available": 0, "dropping_soon": 0, "taken": 0, "unknown": 0}
|
||||
errors = 0
|
||||
|
||||
logger.info(f"Checking {len(drops)} dropped domains...")
|
||||
|
||||
for i, drop in enumerate(drops):
|
||||
full_domain = f"{drop.domain}.{drop.tld}"
|
||||
try:
|
||||
status_result = await check_drop_status(full_domain)
|
||||
logger.info(f"Checking {len(drops)} dropped domains (batch mode)...")
|
||||
|
||||
settings = get_settings()
|
||||
delay = float(getattr(settings, "domain_check_delay_seconds", 0.3) or 0.3)
|
||||
max_concurrent = int(getattr(settings, "domain_check_max_concurrent", 3) or 3)
|
||||
|
||||
# Build (drop_id, domain) tuples for batch checker
|
||||
domain_tuples: list[tuple[int, str]] = [(d.id, f"{d.domain}.{d.tld}") for d in drops]
|
||||
|
||||
# Process in batches to bound memory + keep DB commits reasonable
|
||||
now = datetime.utcnow()
|
||||
for start in range(0, len(domain_tuples), batch_size):
|
||||
batch = domain_tuples[start : start + batch_size]
|
||||
results = await check_drops_batch(
|
||||
batch,
|
||||
delay_between_requests=delay,
|
||||
max_concurrent=max_concurrent,
|
||||
)
|
||||
|
||||
# Prepare bulk updates
|
||||
updates: list[dict] = []
|
||||
for drop_id, status_result in results:
|
||||
checked += 1
|
||||
stats[status_result.status] = stats.get(status_result.status, 0) + 1
|
||||
|
||||
# Update in DB
|
||||
await db.execute(
|
||||
update(DroppedDomain)
|
||||
.where(DroppedDomain.id == drop.id)
|
||||
.values(
|
||||
availability_status=status_result.status,
|
||||
rdap_status=str(status_result.rdap_status)[:255] if status_result.rdap_status else None,
|
||||
last_status_check=datetime.utcnow(),
|
||||
deletion_date=status_result.deletion_date,
|
||||
)
|
||||
|
||||
updates.append(
|
||||
{
|
||||
"id": drop_id,
|
||||
"availability_status": status_result.status,
|
||||
"rdap_status": str(status_result.rdap_status)[:255] if status_result.rdap_status else None,
|
||||
"last_status_check": now,
|
||||
"deletion_date": status_result.deletion_date,
|
||||
}
|
||||
)
|
||||
|
||||
# Log progress every 25 domains
|
||||
if (i + 1) % 25 == 0:
|
||||
logger.info(f"Checked {i + 1}/{len(drops)}: {stats}")
|
||||
await db.commit() # Commit in batches
|
||||
|
||||
# Rate limit: 200ms between requests (5 req/sec)
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
logger.warning(f"Error checking {full_domain}: {e}")
|
||||
|
||||
# Bulk update using executemany
|
||||
stmt = (
|
||||
update(DroppedDomain)
|
||||
.where(DroppedDomain.id == bindparam("id"))
|
||||
.values(
|
||||
availability_status=bindparam("availability_status"),
|
||||
rdap_status=bindparam("rdap_status"),
|
||||
last_status_check=bindparam("last_status_check"),
|
||||
deletion_date=bindparam("deletion_date"),
|
||||
)
|
||||
)
|
||||
await db.execute(stmt, updates)
|
||||
await db.commit()
|
||||
|
||||
logger.info(f"Checked {min(start + batch_size, len(domain_tuples))}/{len(domain_tuples)}: {stats}")
|
||||
|
||||
# Final commit
|
||||
await db.commit()
|
||||
# (already committed per batch)
|
||||
|
||||
logger.info(
|
||||
f"Drops status update complete: "
|
||||
|
||||
Reference in New Issue
Block a user