perf: Batch verify drops status + bulk DB updates
This commit is contained in:
@ -501,8 +501,9 @@ async def verify_drops_availability(
|
|||||||
Returns:
|
Returns:
|
||||||
dict with stats: checked, available, dropping_soon, taken, errors
|
dict with stats: checked, available, dropping_soon, taken, errors
|
||||||
"""
|
"""
|
||||||
from sqlalchemy import update
|
from sqlalchemy import update, bindparam, case
|
||||||
from app.services.drop_status_checker import check_drop_status
|
from app.services.drop_status_checker import check_drops_batch
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
logger.info(f"Starting drops status update (max {max_checks} checks)...")
|
logger.info(f"Starting drops status update (max {max_checks} checks)...")
|
||||||
|
|
||||||
@ -510,16 +511,26 @@ async def verify_drops_availability(
|
|||||||
cutoff = datetime.utcnow() - timedelta(hours=24)
|
cutoff = datetime.utcnow() - timedelta(hours=24)
|
||||||
check_cutoff = datetime.utcnow() - timedelta(hours=2) # Re-check every 2 hours
|
check_cutoff = datetime.utcnow() - timedelta(hours=2) # Re-check every 2 hours
|
||||||
|
|
||||||
|
# Prioritization (fast + predictable):
|
||||||
|
# 1) never checked first
|
||||||
|
# 2) then oldest check first
|
||||||
|
# 3) then unknown status
|
||||||
|
# 4) then shortest domains first
|
||||||
|
unknown_first = case((DroppedDomain.availability_status == "unknown", 0), else_=1)
|
||||||
|
never_checked_first = case((DroppedDomain.last_status_check.is_(None), 0), else_=1)
|
||||||
|
|
||||||
query = (
|
query = (
|
||||||
select(DroppedDomain)
|
select(DroppedDomain)
|
||||||
.where(DroppedDomain.dropped_date >= cutoff)
|
.where(DroppedDomain.dropped_date >= cutoff)
|
||||||
.where(
|
.where(
|
||||||
(DroppedDomain.last_status_check == None) | # Never checked
|
(DroppedDomain.last_status_check.is_(None)) # Never checked
|
||||||
(DroppedDomain.last_status_check < check_cutoff) # Not checked recently
|
| (DroppedDomain.last_status_check < check_cutoff) # Not checked recently
|
||||||
)
|
)
|
||||||
.order_by(
|
.order_by(
|
||||||
DroppedDomain.availability_status.desc(), # Unknown first
|
never_checked_first.asc(),
|
||||||
DroppedDomain.length.asc() # Then short domains
|
DroppedDomain.last_status_check.asc().nullsfirst(),
|
||||||
|
unknown_first.asc(),
|
||||||
|
DroppedDomain.length.asc(),
|
||||||
)
|
)
|
||||||
.limit(max_checks)
|
.limit(max_checks)
|
||||||
)
|
)
|
||||||
@ -535,41 +546,59 @@ async def verify_drops_availability(
|
|||||||
stats = {"available": 0, "dropping_soon": 0, "taken": 0, "unknown": 0}
|
stats = {"available": 0, "dropping_soon": 0, "taken": 0, "unknown": 0}
|
||||||
errors = 0
|
errors = 0
|
||||||
|
|
||||||
logger.info(f"Checking {len(drops)} dropped domains...")
|
logger.info(f"Checking {len(drops)} dropped domains (batch mode)...")
|
||||||
|
|
||||||
for i, drop in enumerate(drops):
|
settings = get_settings()
|
||||||
full_domain = f"{drop.domain}.{drop.tld}"
|
delay = float(getattr(settings, "domain_check_delay_seconds", 0.3) or 0.3)
|
||||||
try:
|
max_concurrent = int(getattr(settings, "domain_check_max_concurrent", 3) or 3)
|
||||||
status_result = await check_drop_status(full_domain)
|
|
||||||
|
# Build (drop_id, domain) tuples for batch checker
|
||||||
|
domain_tuples: list[tuple[int, str]] = [(d.id, f"{d.domain}.{d.tld}") for d in drops]
|
||||||
|
|
||||||
|
# Process in batches to bound memory + keep DB commits reasonable
|
||||||
|
now = datetime.utcnow()
|
||||||
|
for start in range(0, len(domain_tuples), batch_size):
|
||||||
|
batch = domain_tuples[start : start + batch_size]
|
||||||
|
results = await check_drops_batch(
|
||||||
|
batch,
|
||||||
|
delay_between_requests=delay,
|
||||||
|
max_concurrent=max_concurrent,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare bulk updates
|
||||||
|
updates: list[dict] = []
|
||||||
|
for drop_id, status_result in results:
|
||||||
checked += 1
|
checked += 1
|
||||||
stats[status_result.status] = stats.get(status_result.status, 0) + 1
|
stats[status_result.status] = stats.get(status_result.status, 0) + 1
|
||||||
|
|
||||||
# Update in DB
|
updates.append(
|
||||||
await db.execute(
|
{
|
||||||
update(DroppedDomain)
|
"id": drop_id,
|
||||||
.where(DroppedDomain.id == drop.id)
|
"availability_status": status_result.status,
|
||||||
.values(
|
"rdap_status": str(status_result.rdap_status)[:255] if status_result.rdap_status else None,
|
||||||
availability_status=status_result.status,
|
"last_status_check": now,
|
||||||
rdap_status=str(status_result.rdap_status)[:255] if status_result.rdap_status else None,
|
"deletion_date": status_result.deletion_date,
|
||||||
last_status_check=datetime.utcnow(),
|
}
|
||||||
deletion_date=status_result.deletion_date,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Log progress every 25 domains
|
# Bulk update using executemany
|
||||||
if (i + 1) % 25 == 0:
|
stmt = (
|
||||||
logger.info(f"Checked {i + 1}/{len(drops)}: {stats}")
|
update(DroppedDomain)
|
||||||
await db.commit() # Commit in batches
|
.where(DroppedDomain.id == bindparam("id"))
|
||||||
|
.values(
|
||||||
|
availability_status=bindparam("availability_status"),
|
||||||
|
rdap_status=bindparam("rdap_status"),
|
||||||
|
last_status_check=bindparam("last_status_check"),
|
||||||
|
deletion_date=bindparam("deletion_date"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
await db.execute(stmt, updates)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
# Rate limit: 200ms between requests (5 req/sec)
|
logger.info(f"Checked {min(start + batch_size, len(domain_tuples))}/{len(domain_tuples)}: {stats}")
|
||||||
await asyncio.sleep(0.2)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
errors += 1
|
|
||||||
logger.warning(f"Error checking {full_domain}: {e}")
|
|
||||||
|
|
||||||
# Final commit
|
# Final commit
|
||||||
await db.commit()
|
# (already committed per batch)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Drops status update complete: "
|
f"Drops status update complete: "
|
||||||
|
|||||||
Reference in New Issue
Block a user