From 0729c2426ab1662925aa00507ea6fbd2162f8abe Mon Sep 17 00:00:00 2001 From: Yves Gugger Date: Fri, 19 Dec 2025 12:41:46 +0100 Subject: [PATCH] Deploy: 2025-12-19 12:41 --- backend/app/scheduler.py | 39 +++++++++++++-------- backend/app/services/drop_status_checker.py | 35 ++++++++++-------- 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/backend/app/scheduler.py b/backend/app/scheduler.py index 5da9588..08aa431 100644 --- a/backend/app/scheduler.py +++ b/backend/app/scheduler.py @@ -735,12 +735,12 @@ def setup_scheduler(): replace_existing=True, ) - # Drops RDAP status update (every 15 minutes - check real status) + # Drops RDAP status update (hourly - check real status with rate limiting) scheduler.add_job( update_drops_status, - CronTrigger(minute='*/15'), # Every 15 minutes + CronTrigger(minute=20), # Every hour at :20 id="drops_status_update", - name="Drops Status Update (15-min)", + name="Drops Status Update (hourly)", replace_existing=True, ) @@ -1046,9 +1046,11 @@ async def update_drops_status(): """ Update RDAP status for dropped domains. - This job runs every 30 minutes to check the real status of drops + This job runs every hour to check the real status of drops (available, dropping_soon, taken) and store it in the database. - This way users see the status instantly without needing to check manually. + Uses rate limiting (0.5s delay) to avoid 429 errors from RDAP servers. + + With 0.5s delay, 50 domains takes ~25 seconds. """ logger.info("Starting drops status update...") @@ -1059,19 +1061,24 @@ async def update_drops_status(): from datetime import datetime, timedelta async with AsyncSessionLocal() as db: - # Get drops that haven't been status-checked in the last 30 minutes + # Get drops that haven't been status-checked in the last hour # Or have never been checked - thirty_min_ago = datetime.utcnow() - timedelta(minutes=30) + one_hour_ago = datetime.utcnow() - timedelta(hours=1) + # Prioritize .ch and .li (our main focus), then short domains query = ( select(DroppedDomain) .where( (DroppedDomain.availability_status == 'unknown') | (DroppedDomain.last_status_check == None) | - (DroppedDomain.last_status_check < thirty_min_ago) + (DroppedDomain.last_status_check < one_hour_ago) ) - .order_by(DroppedDomain.length.asc()) # Short domains first - .limit(200) # Process 200 per run + .order_by( + # Prioritize .ch and .li + DroppedDomain.tld.desc(), + DroppedDomain.length.asc() + ) + .limit(50) # Only 50 per run to avoid rate limiting ) result = await db.execute(query) @@ -1081,18 +1088,19 @@ async def update_drops_status(): logger.info("All drops have been status-checked recently") return - logger.info(f"Checking status for {len(drops)} drops...") + logger.info(f"Checking status for {len(drops)} drops (with rate limiting)...") # Prepare domain list domains_to_check = [(d.id, f"{d.domain}.{d.tld}") for d in drops] - # Batch check with rate limiting - results = await batch_check_drops(domains_to_check) + # Batch check with 0.5s delay between requests + results = await batch_check_drops(domains_to_check, delay=0.5) # Update database available_count = 0 dropping_soon_count = 0 taken_count = 0 + unknown_count = 0 for drop_id, status in results: await db.execute( @@ -1112,6 +1120,8 @@ async def update_drops_status(): dropping_soon_count += 1 elif status.status == 'taken': taken_count += 1 + else: + unknown_count += 1 await db.commit() @@ -1120,7 +1130,8 @@ async def update_drops_status(): f"{len(results)} checked, " f"{available_count} available, " f"{dropping_soon_count} dropping soon, " - f"{taken_count} taken" + f"{taken_count} taken, " + f"{unknown_count} unknown" ) except Exception as e: diff --git a/backend/app/services/drop_status_checker.py b/backend/app/services/drop_status_checker.py index c505659..ba091e4 100644 --- a/backend/app/services/drop_status_checker.py +++ b/backend/app/services/drop_status_checker.py @@ -107,7 +107,11 @@ async def check_drop_status(domain: str) -> DropStatus: url = f"{endpoint}{domain}" try: - async with httpx.AsyncClient(timeout=10) as client: + headers = { + 'User-Agent': 'Mozilla/5.0 (compatible; PounceBot/1.0; +https://pounce.ch)', + 'Accept': 'application/rdap+json, application/json', + } + async with httpx.AsyncClient(timeout=10, headers=headers) as client: resp = await client.get(url) # 404 = Domain not found = AVAILABLE! @@ -212,29 +216,32 @@ async def check_drop_status(domain: str) -> DropStatus: ) -async def batch_check_drops(domains: list[tuple[int, str]]) -> list[tuple[int, DropStatus]]: +async def batch_check_drops(domains: list[tuple[int, str]], delay: float = 0.5) -> list[tuple[int, DropStatus]]: """ - Check status for multiple domains in parallel. + Check status for multiple domains with rate limiting. Args: domains: List of (id, domain_name) tuples + delay: Delay between checks in seconds (to avoid rate limiting) Returns: List of (id, DropStatus) tuples """ import asyncio - async def check_one(item: tuple[int, str]) -> tuple[int, DropStatus]: - drop_id, domain = item + results = [] + + # Process sequentially with delay to avoid rate limiting (429) + for i, (drop_id, domain) in enumerate(domains): status = await check_drop_status(domain) - return (drop_id, status) + results.append((drop_id, status)) + + # Add delay between requests to avoid rate limiting + if i < len(domains) - 1: + await asyncio.sleep(delay) + + # Log progress every 50 domains + if (i + 1) % 50 == 0: + logger.info(f"Checked {i + 1}/{len(domains)} drops...") - # Limit concurrency to avoid overwhelming RDAP servers - semaphore = asyncio.Semaphore(10) - - async def limited_check(item): - async with semaphore: - return await check_one(item) - - results = await asyncio.gather(*[limited_check(d) for d in domains]) return results