perf: Reuse pooled http client for RDAP

This commit is contained in:
2025-12-21 15:50:59 +01:00
parent 4ec86789cf
commit 9205536bf2
5 changed files with 269 additions and 208 deletions

View File

@ -19,6 +19,7 @@ from app.config import get_settings
from app.database import init_db from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler from app.scheduler import start_scheduler, stop_scheduler
from app.observability.metrics import instrument_app from app.observability.metrics import instrument_app
from app.services.http_client_pool import close_rdap_http_client
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
@ -59,6 +60,7 @@ async def lifespan(app: FastAPI):
# Shutdown # Shutdown
if settings.enable_scheduler: if settings.enable_scheduler:
stop_scheduler() stop_scheduler()
await close_rdap_http_client()
logger.info("Application shutdown complete") logger.info("Application shutdown complete")

View File

@ -22,6 +22,7 @@ import whodap
import httpx import httpx
from app.models.domain import DomainStatus from app.models.domain import DomainStatus
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -164,8 +165,8 @@ class DomainChecker:
url = f"{endpoint}{domain}" url = f"{endpoint}{domain}"
try: try:
async with httpx.AsyncClient(timeout=10.0) as client: client = await get_rdap_http_client()
response = await client.get(url, follow_redirects=True) response = await client.get(url, timeout=10.0)
if response.status_code == 404: if response.status_code == 404:
# Domain not found = available # Domain not found = available
@ -181,15 +182,14 @@ class DomainChecker:
data = response.json() data = response.json()
# Check if domain is pending deletion (dropped but not yet purged) # Check if domain is pending deletion (dropped but not yet purged)
# These domains are effectively available for registration domain_status = data.get("status", [])
domain_status = data.get('status', [])
pending_delete_statuses = [ pending_delete_statuses = [
'pending delete', "pending delete",
'pendingdelete', "pendingdelete",
'redemption period', "redemption period",
'redemptionperiod', "redemptionperiod",
'pending purge', "pending purge",
'pendingpurge', "pendingpurge",
] ]
is_pending_delete = any( is_pending_delete = any(
@ -198,7 +198,9 @@ class DomainChecker:
) )
if is_pending_delete: if is_pending_delete:
logger.info(f"{domain} is in transition/pending delete (status: {domain_status})") logger.info(
f"{domain} is in transition/pending delete (status: {domain_status})"
)
return DomainCheckResult( return DomainCheckResult(
domain=domain, domain=domain,
status=DomainStatus.DROPPING_SOON, # In transition, not yet available status=DomainStatus.DROPPING_SOON, # In transition, not yet available
@ -212,65 +214,51 @@ class DomainChecker:
creation_date = None creation_date = None
updated_date = None updated_date = None
registrar = None registrar = None
name_servers = [] name_servers: list[str] = []
# Parse events - different registries use different event actions # Parse events
# SWITCH (.ch/.li): uses "expiration" events = data.get("events", [])
# DENIC (.de): uses "last changed" but no expiration in RDAP (only WHOIS)
events = data.get('events', [])
for event in events: for event in events:
action = event.get('eventAction', '').lower() action = event.get("eventAction", "").lower()
date_str = event.get('eventDate', '') date_str = event.get("eventDate", "")
# Expiration date - check multiple variations if not expiration_date and any(x in action for x in ["expiration", "expire"]):
if not expiration_date:
if any(x in action for x in ['expiration', 'expire']):
expiration_date = self._parse_datetime(date_str) expiration_date = self._parse_datetime(date_str)
# Creation/registration date if not creation_date and any(x in action for x in ["registration", "created"]):
if not creation_date:
if any(x in action for x in ['registration', 'created']):
creation_date = self._parse_datetime(date_str) creation_date = self._parse_datetime(date_str)
# Update date if any(x in action for x in ["changed", "update", "last changed"]):
if any(x in action for x in ['changed', 'update', 'last changed']):
updated_date = self._parse_datetime(date_str) updated_date = self._parse_datetime(date_str)
# Parse nameservers # Parse nameservers
nameservers = data.get('nameservers', []) for ns in data.get("nameservers", []):
for ns in nameservers:
if isinstance(ns, dict): if isinstance(ns, dict):
ns_name = ns.get('ldhName', '') ns_name = ns.get("ldhName", "")
if ns_name: if ns_name:
name_servers.append(ns_name.lower()) name_servers.append(ns_name.lower())
# Parse registrar from entities - check multiple roles # Parse registrar from entities
entities = data.get('entities', []) for entity in data.get("entities", []):
for entity in entities: roles = entity.get("roles", [])
roles = entity.get('roles', []) if any(r in roles for r in ["registrar", "technical"]):
# Look for registrar or technical contact as registrar source vcard = entity.get("vcardArray", [])
if any(r in roles for r in ['registrar', 'technical']):
# Try vcardArray first
vcard = entity.get('vcardArray', [])
if isinstance(vcard, list) and len(vcard) > 1: if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]: for item in vcard[1]:
if isinstance(item, list) and len(item) > 3: if isinstance(item, list) and len(item) > 3:
if item[0] in ('fn', 'org') and item[3]: if item[0] in ("fn", "org") and item[3]:
registrar = str(item[3]) registrar = str(item[3])
break break
# Try handle as fallback
if not registrar: if not registrar:
handle = entity.get('handle', '') handle = entity.get("handle", "")
if handle: if handle:
registrar = handle registrar = handle
if registrar: if registrar:
break break
# For .de domains: DENIC doesn't expose expiration via RDAP # For .de domains: DENIC doesn't expose expiration via RDAP
# We need to use WHOIS as fallback for expiration date if tld == "de" and not expiration_date:
if tld == 'de' and not expiration_date:
logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS") logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS")
# Return what we have, scheduler will update via WHOIS later
return DomainCheckResult( return DomainCheckResult(
domain=domain, domain=domain,
@ -305,8 +293,8 @@ class DomainChecker:
url = f"{self.IANA_BOOTSTRAP_URL}{domain}" url = f"{self.IANA_BOOTSTRAP_URL}{domain}"
try: try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: client = await get_rdap_http_client()
response = await client.get(url) response = await client.get(url, timeout=15.0)
if response.status_code == 404: if response.status_code == 404:
return DomainCheckResult( return DomainCheckResult(

View File

@ -15,6 +15,8 @@ from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Optional
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ============================================================================ # ============================================================================
@ -49,14 +51,10 @@ class DropStatus:
check_method: str = "rdap" check_method: str = "rdap"
async def _make_rdap_request(url: str, domain: str) -> Optional[dict]: async def _make_rdap_request(client: httpx.AsyncClient, url: str, domain: str) -> Optional[dict]:
"""Make a single RDAP request with proper error handling.""" """Make a single RDAP request with proper error handling."""
try: try:
async with httpx.AsyncClient( resp = await client.get(url, timeout=RDAP_TIMEOUT)
timeout=RDAP_TIMEOUT,
follow_redirects=True, # Important for IANA Bootstrap
) as client:
resp = await client.get(url)
if resp.status_code == 404: if resp.status_code == 404:
# Domain not found = available # Domain not found = available
@ -102,16 +100,17 @@ async def check_drop_status(domain: str) -> DropStatus:
# Try preferred endpoint first # Try preferred endpoint first
data = None data = None
check_method = "rdap" check_method = "rdap"
client = await get_rdap_http_client()
if tld in PREFERRED_ENDPOINTS: if tld in PREFERRED_ENDPOINTS:
url = f"{PREFERRED_ENDPOINTS[tld]}{domain}" url = f"{PREFERRED_ENDPOINTS[tld]}{domain}"
data = await _make_rdap_request(url, domain) data = await _make_rdap_request(client, url, domain)
check_method = f"rdap_{tld}" check_method = f"rdap_{tld}"
# Fall back to IANA Bootstrap if no data yet # Fall back to IANA Bootstrap if no data yet
if data is None: if data is None:
url = f"{IANA_BOOTSTRAP}{domain}" url = f"{IANA_BOOTSTRAP}{domain}"
data = await _make_rdap_request(url, domain) data = await _make_rdap_request(client, url, domain)
check_method = "rdap_iana" check_method = "rdap_iana"
# Still no data? Return unknown # Still no data? Return unknown

View File

@ -0,0 +1,70 @@
"""
Shared HTTP clients for performance.
Why:
- Creating a new httpx.AsyncClient per request is expensive (TLS handshakes, no connection reuse).
- For high-frequency lookups (RDAP), we keep one pooled AsyncClient per process.
Notes:
- Per-request timeouts can still be overridden in client.get(..., timeout=...).
- Call close_* on shutdown for clean exit (optional but recommended).
"""
from __future__ import annotations
import asyncio
from typing import Optional
import httpx
_rdap_client: Optional[httpx.AsyncClient] = None
_rdap_client_lock = asyncio.Lock()
def _rdap_limits() -> httpx.Limits:
# Conservative but effective defaults (works well for bursty traffic).
return httpx.Limits(max_connections=50, max_keepalive_connections=20, keepalive_expiry=30.0)
def _rdap_timeout() -> httpx.Timeout:
# Overall timeout can be overridden per request.
return httpx.Timeout(15.0, connect=5.0)
async def get_rdap_http_client() -> httpx.AsyncClient:
"""
Get a shared httpx.AsyncClient for RDAP requests.
Safe for concurrent use within the same event loop.
"""
global _rdap_client
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
async with _rdap_client_lock:
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
_rdap_client = httpx.AsyncClient(
timeout=_rdap_timeout(),
follow_redirects=True,
limits=_rdap_limits(),
headers={
# Be a good citizen; many registries/redirectors are sensitive.
"User-Agent": "pounce/1.0 (+https://pounce.ch)",
"Accept": "application/rdap+json, application/json",
},
)
return _rdap_client
async def close_rdap_http_client() -> None:
"""Close the shared RDAP client (best-effort)."""
global _rdap_client
if _rdap_client is None:
return
try:
if not _rdap_client.is_closed:
await _rdap_client.aclose()
finally:
_rdap_client = None

View File

@ -18,6 +18,7 @@ load_dotenv()
from app.config import get_settings from app.config import get_settings
from app.database import init_db from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler from app.scheduler import start_scheduler, stop_scheduler
from app.services.http_client_pool import close_rdap_http_client
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
@ -54,6 +55,7 @@ async def main() -> None:
await stop_event.wait() await stop_event.wait()
stop_scheduler() stop_scheduler()
await close_rdap_http_client()
logger.info("Scheduler stopped. Bye.") logger.info("Scheduler stopped. Bye.")