From 9205536bf2a8e5432504448b6f2a06b5a5331be6 Mon Sep 17 00:00:00 2001 From: Yves Gugger Date: Sun, 21 Dec 2025 15:50:59 +0100 Subject: [PATCH] perf: Reuse pooled http client for RDAP --- backend/app/main.py | 2 + backend/app/services/domain_checker.py | 356 ++++++++++---------- backend/app/services/drop_status_checker.py | 47 ++- backend/app/services/http_client_pool.py | 70 ++++ backend/run_scheduler.py | 2 + 5 files changed, 269 insertions(+), 208 deletions(-) create mode 100644 backend/app/services/http_client_pool.py diff --git a/backend/app/main.py b/backend/app/main.py index 98130da..2c76b44 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -19,6 +19,7 @@ from app.config import get_settings from app.database import init_db from app.scheduler import start_scheduler, stop_scheduler from app.observability.metrics import instrument_app +from app.services.http_client_pool import close_rdap_http_client # Configure logging logging.basicConfig( @@ -59,6 +60,7 @@ async def lifespan(app: FastAPI): # Shutdown if settings.enable_scheduler: stop_scheduler() + await close_rdap_http_client() logger.info("Application shutdown complete") diff --git a/backend/app/services/domain_checker.py b/backend/app/services/domain_checker.py index 83f4d87..9a1afd2 100644 --- a/backend/app/services/domain_checker.py +++ b/backend/app/services/domain_checker.py @@ -22,6 +22,7 @@ import whodap import httpx from app.models.domain import DomainStatus +from app.services.http_client_pool import get_rdap_http_client logger = logging.getLogger(__name__) @@ -164,129 +165,116 @@ class DomainChecker: url = f"{endpoint}{domain}" try: - async with httpx.AsyncClient(timeout=10.0) as client: - response = await client.get(url, follow_redirects=True) - - if response.status_code == 404: - # Domain not found = available + client = await get_rdap_http_client() + response = await client.get(url, timeout=10.0) + + if response.status_code == 404: + # Domain not found = available + return DomainCheckResult( + domain=domain, + status=DomainStatus.AVAILABLE, + is_available=True, + check_method="rdap_custom", + ) + + if response.status_code == 200: + # Domain exists in registry - but check status for pending delete + data = response.json() + + # Check if domain is pending deletion (dropped but not yet purged) + domain_status = data.get("status", []) + pending_delete_statuses = [ + "pending delete", + "pendingdelete", + "redemption period", + "redemptionperiod", + "pending purge", + "pendingpurge", + ] + + is_pending_delete = any( + any(pds in str(s).lower() for pds in pending_delete_statuses) + for s in domain_status + ) + + if is_pending_delete: + logger.info( + f"{domain} is in transition/pending delete (status: {domain_status})" + ) return DomainCheckResult( domain=domain, - status=DomainStatus.AVAILABLE, - is_available=True, + status=DomainStatus.DROPPING_SOON, # In transition, not yet available + is_available=False, # Not yet registrable check_method="rdap_custom", + raw_data={"rdap_status": domain_status, "note": "pending_delete"}, ) - - if response.status_code == 200: - # Domain exists in registry - but check status for pending delete - data = response.json() - - # Check if domain is pending deletion (dropped but not yet purged) - # These domains are effectively available for registration - domain_status = data.get('status', []) - pending_delete_statuses = [ - 'pending delete', - 'pendingdelete', - 'redemption period', - 'redemptionperiod', - 'pending purge', - 'pendingpurge', - ] - - is_pending_delete = any( - any(pds in str(s).lower() for pds in pending_delete_statuses) - for s in domain_status - ) - - if is_pending_delete: - logger.info(f"{domain} is in transition/pending delete (status: {domain_status})") - return DomainCheckResult( - domain=domain, - status=DomainStatus.DROPPING_SOON, # In transition, not yet available - is_available=False, # Not yet registrable - check_method="rdap_custom", - raw_data={"rdap_status": domain_status, "note": "pending_delete"}, - ) - - # Extract dates from events - expiration_date = None - creation_date = None - updated_date = None - registrar = None - name_servers = [] - - # Parse events - different registries use different event actions - # SWITCH (.ch/.li): uses "expiration" - # DENIC (.de): uses "last changed" but no expiration in RDAP (only WHOIS) - events = data.get('events', []) - for event in events: - action = event.get('eventAction', '').lower() - date_str = event.get('eventDate', '') - - # Expiration date - check multiple variations - if not expiration_date: - if any(x in action for x in ['expiration', 'expire']): - expiration_date = self._parse_datetime(date_str) - - # Creation/registration date - if not creation_date: - if any(x in action for x in ['registration', 'created']): - creation_date = self._parse_datetime(date_str) - - # Update date - if any(x in action for x in ['changed', 'update', 'last changed']): - updated_date = self._parse_datetime(date_str) - - # Parse nameservers - nameservers = data.get('nameservers', []) - for ns in nameservers: - if isinstance(ns, dict): - ns_name = ns.get('ldhName', '') - if ns_name: - name_servers.append(ns_name.lower()) - - # Parse registrar from entities - check multiple roles - entities = data.get('entities', []) - for entity in entities: - roles = entity.get('roles', []) - # Look for registrar or technical contact as registrar source - if any(r in roles for r in ['registrar', 'technical']): - # Try vcardArray first - vcard = entity.get('vcardArray', []) - if isinstance(vcard, list) and len(vcard) > 1: - for item in vcard[1]: - if isinstance(item, list) and len(item) > 3: - if item[0] in ('fn', 'org') and item[3]: - registrar = str(item[3]) - break - # Try handle as fallback - if not registrar: - handle = entity.get('handle', '') - if handle: - registrar = handle - if registrar: - break - - # For .de domains: DENIC doesn't expose expiration via RDAP - # We need to use WHOIS as fallback for expiration date - if tld == 'de' and not expiration_date: - logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS") - # Return what we have, scheduler will update via WHOIS later - - return DomainCheckResult( - domain=domain, - status=DomainStatus.TAKEN, - is_available=False, - registrar=registrar, - expiration_date=expiration_date, - creation_date=creation_date, - updated_date=updated_date, - name_servers=name_servers if name_servers else None, - check_method="rdap_custom", - ) - - # Other status codes - try fallback - logger.warning(f"Custom RDAP returned {response.status_code} for {domain}") - return None + + # Extract dates from events + expiration_date = None + creation_date = None + updated_date = None + registrar = None + name_servers: list[str] = [] + + # Parse events + events = data.get("events", []) + for event in events: + action = event.get("eventAction", "").lower() + date_str = event.get("eventDate", "") + + if not expiration_date and any(x in action for x in ["expiration", "expire"]): + expiration_date = self._parse_datetime(date_str) + + if not creation_date and any(x in action for x in ["registration", "created"]): + creation_date = self._parse_datetime(date_str) + + if any(x in action for x in ["changed", "update", "last changed"]): + updated_date = self._parse_datetime(date_str) + + # Parse nameservers + for ns in data.get("nameservers", []): + if isinstance(ns, dict): + ns_name = ns.get("ldhName", "") + if ns_name: + name_servers.append(ns_name.lower()) + + # Parse registrar from entities + for entity in data.get("entities", []): + roles = entity.get("roles", []) + if any(r in roles for r in ["registrar", "technical"]): + vcard = entity.get("vcardArray", []) + if isinstance(vcard, list) and len(vcard) > 1: + for item in vcard[1]: + if isinstance(item, list) and len(item) > 3: + if item[0] in ("fn", "org") and item[3]: + registrar = str(item[3]) + break + if not registrar: + handle = entity.get("handle", "") + if handle: + registrar = handle + if registrar: + break + + # For .de domains: DENIC doesn't expose expiration via RDAP + if tld == "de" and not expiration_date: + logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS") + + return DomainCheckResult( + domain=domain, + status=DomainStatus.TAKEN, + is_available=False, + registrar=registrar, + expiration_date=expiration_date, + creation_date=creation_date, + updated_date=updated_date, + name_servers=name_servers if name_servers else None, + check_method="rdap_custom", + ) + + # Other status codes - try fallback + logger.warning(f"Custom RDAP returned {response.status_code} for {domain}") + return None except httpx.TimeoutException: logger.warning(f"Custom RDAP timeout for {domain}") @@ -305,74 +293,64 @@ class DomainChecker: url = f"{self.IANA_BOOTSTRAP_URL}{domain}" try: - async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: - response = await client.get(url) - - if response.status_code == 404: - return DomainCheckResult( - domain=domain, - status=DomainStatus.AVAILABLE, - is_available=True, - check_method="rdap_iana", - ) - - if response.status_code == 429: - logger.warning(f"RDAP rate limited for {domain}") - return None - - if response.status_code != 200: - return None - - data = response.json() - - # Parse events for dates - expiration_date = None - creation_date = None - registrar = None - - for event in data.get('events', []): - action = event.get('eventAction', '').lower() - date_str = event.get('eventDate', '') - if 'expiration' in action and date_str: - expiration_date = self._parse_datetime(date_str) - elif 'registration' in action and date_str: - creation_date = self._parse_datetime(date_str) - - # Extract registrar - for entity in data.get('entities', []): - roles = entity.get('roles', []) - if 'registrar' in roles: - vcard = entity.get('vcardArray', []) - if isinstance(vcard, list) and len(vcard) > 1: - for item in vcard[1]: - if isinstance(item, list) and len(item) > 3: - if item[0] == 'fn' and item[3]: - registrar = str(item[3]) - break - - # Check status for pending delete - status_list = data.get('status', []) - status_str = ' '.join(str(s).lower() for s in status_list) - - is_dropping = any(x in status_str for x in [ - 'pending delete', 'pendingdelete', - 'redemption period', 'redemptionperiod', - ]) - - if is_dropping: - return DomainCheckResult( - domain=domain, - status=DomainStatus.DROPPING_SOON, - is_available=False, - registrar=registrar, - expiration_date=expiration_date, - creation_date=creation_date, - check_method="rdap_iana", - ) + client = await get_rdap_http_client() + response = await client.get(url, timeout=15.0) + if response.status_code == 404: return DomainCheckResult( domain=domain, - status=DomainStatus.TAKEN, + status=DomainStatus.AVAILABLE, + is_available=True, + check_method="rdap_iana", + ) + + if response.status_code == 429: + logger.warning(f"RDAP rate limited for {domain}") + return None + + if response.status_code != 200: + return None + + data = response.json() + + # Parse events for dates + expiration_date = None + creation_date = None + registrar = None + + for event in data.get('events', []): + action = event.get('eventAction', '').lower() + date_str = event.get('eventDate', '') + if 'expiration' in action and date_str: + expiration_date = self._parse_datetime(date_str) + elif 'registration' in action and date_str: + creation_date = self._parse_datetime(date_str) + + # Extract registrar + for entity in data.get('entities', []): + roles = entity.get('roles', []) + if 'registrar' in roles: + vcard = entity.get('vcardArray', []) + if isinstance(vcard, list) and len(vcard) > 1: + for item in vcard[1]: + if isinstance(item, list) and len(item) > 3: + if item[0] == 'fn' and item[3]: + registrar = str(item[3]) + break + + # Check status for pending delete + status_list = data.get('status', []) + status_str = ' '.join(str(s).lower() for s in status_list) + + is_dropping = any(x in status_str for x in [ + 'pending delete', 'pendingdelete', + 'redemption period', 'redemptionperiod', + ]) + + if is_dropping: + return DomainCheckResult( + domain=domain, + status=DomainStatus.DROPPING_SOON, is_available=False, registrar=registrar, expiration_date=expiration_date, @@ -380,6 +358,16 @@ class DomainChecker: check_method="rdap_iana", ) + return DomainCheckResult( + domain=domain, + status=DomainStatus.TAKEN, + is_available=False, + registrar=registrar, + expiration_date=expiration_date, + creation_date=creation_date, + check_method="rdap_iana", + ) + except httpx.TimeoutException: logger.debug(f"IANA RDAP timeout for {domain}") return None diff --git a/backend/app/services/drop_status_checker.py b/backend/app/services/drop_status_checker.py index 9101d9a..a3199b5 100644 --- a/backend/app/services/drop_status_checker.py +++ b/backend/app/services/drop_status_checker.py @@ -15,6 +15,8 @@ from dataclasses import dataclass from datetime import datetime from typing import Optional +from app.services.http_client_pool import get_rdap_http_client + logger = logging.getLogger(__name__) # ============================================================================ @@ -49,30 +51,26 @@ class DropStatus: check_method: str = "rdap" -async def _make_rdap_request(url: str, domain: str) -> Optional[dict]: +async def _make_rdap_request(client: httpx.AsyncClient, url: str, domain: str) -> Optional[dict]: """Make a single RDAP request with proper error handling.""" try: - async with httpx.AsyncClient( - timeout=RDAP_TIMEOUT, - follow_redirects=True, # Important for IANA Bootstrap - ) as client: - resp = await client.get(url) - - if resp.status_code == 404: - # Domain not found = available - return {"_available": True, "_status_code": 404} - - if resp.status_code == 200: - data = resp.json() - data["_status_code"] = 200 - return data - - if resp.status_code == 429: - logger.warning(f"RDAP rate limited for {domain}") - return {"_rate_limited": True, "_status_code": 429} - - logger.warning(f"RDAP returned {resp.status_code} for {domain}") - return None + resp = await client.get(url, timeout=RDAP_TIMEOUT) + + if resp.status_code == 404: + # Domain not found = available + return {"_available": True, "_status_code": 404} + + if resp.status_code == 200: + data = resp.json() + data["_status_code"] = 200 + return data + + if resp.status_code == 429: + logger.warning(f"RDAP rate limited for {domain}") + return {"_rate_limited": True, "_status_code": 429} + + logger.warning(f"RDAP returned {resp.status_code} for {domain}") + return None except httpx.TimeoutException: logger.debug(f"RDAP timeout for {domain} at {url}") @@ -102,16 +100,17 @@ async def check_drop_status(domain: str) -> DropStatus: # Try preferred endpoint first data = None check_method = "rdap" + client = await get_rdap_http_client() if tld in PREFERRED_ENDPOINTS: url = f"{PREFERRED_ENDPOINTS[tld]}{domain}" - data = await _make_rdap_request(url, domain) + data = await _make_rdap_request(client, url, domain) check_method = f"rdap_{tld}" # Fall back to IANA Bootstrap if no data yet if data is None: url = f"{IANA_BOOTSTRAP}{domain}" - data = await _make_rdap_request(url, domain) + data = await _make_rdap_request(client, url, domain) check_method = "rdap_iana" # Still no data? Return unknown diff --git a/backend/app/services/http_client_pool.py b/backend/app/services/http_client_pool.py new file mode 100644 index 0000000..e9d9302 --- /dev/null +++ b/backend/app/services/http_client_pool.py @@ -0,0 +1,70 @@ +""" +Shared HTTP clients for performance. + +Why: +- Creating a new httpx.AsyncClient per request is expensive (TLS handshakes, no connection reuse). +- For high-frequency lookups (RDAP), we keep one pooled AsyncClient per process. + +Notes: +- Per-request timeouts can still be overridden in client.get(..., timeout=...). +- Call close_* on shutdown for clean exit (optional but recommended). +""" + +from __future__ import annotations + +import asyncio +from typing import Optional + +import httpx + +_rdap_client: Optional[httpx.AsyncClient] = None +_rdap_client_lock = asyncio.Lock() + + +def _rdap_limits() -> httpx.Limits: + # Conservative but effective defaults (works well for bursty traffic). + return httpx.Limits(max_connections=50, max_keepalive_connections=20, keepalive_expiry=30.0) + + +def _rdap_timeout() -> httpx.Timeout: + # Overall timeout can be overridden per request. + return httpx.Timeout(15.0, connect=5.0) + + +async def get_rdap_http_client() -> httpx.AsyncClient: + """ + Get a shared httpx.AsyncClient for RDAP requests. + Safe for concurrent use within the same event loop. + """ + global _rdap_client + if _rdap_client is not None and not _rdap_client.is_closed: + return _rdap_client + + async with _rdap_client_lock: + if _rdap_client is not None and not _rdap_client.is_closed: + return _rdap_client + + _rdap_client = httpx.AsyncClient( + timeout=_rdap_timeout(), + follow_redirects=True, + limits=_rdap_limits(), + headers={ + # Be a good citizen; many registries/redirectors are sensitive. + "User-Agent": "pounce/1.0 (+https://pounce.ch)", + "Accept": "application/rdap+json, application/json", + }, + ) + return _rdap_client + + +async def close_rdap_http_client() -> None: + """Close the shared RDAP client (best-effort).""" + global _rdap_client + if _rdap_client is None: + return + try: + if not _rdap_client.is_closed: + await _rdap_client.aclose() + finally: + _rdap_client = None + diff --git a/backend/run_scheduler.py b/backend/run_scheduler.py index 602d04f..986eb7e 100644 --- a/backend/run_scheduler.py +++ b/backend/run_scheduler.py @@ -18,6 +18,7 @@ load_dotenv() from app.config import get_settings from app.database import init_db from app.scheduler import start_scheduler, stop_scheduler +from app.services.http_client_pool import close_rdap_http_client logging.basicConfig( level=logging.INFO, @@ -54,6 +55,7 @@ async def main() -> None: await stop_event.wait() stop_scheduler() + await close_rdap_http_client() logger.info("Scheduler stopped. Bye.")