perf: Reuse pooled http client for RDAP

This commit is contained in:
2025-12-21 15:50:59 +01:00
parent 4ec86789cf
commit 9205536bf2
5 changed files with 269 additions and 208 deletions

View File

@ -19,6 +19,7 @@ from app.config import get_settings
from app.database import init_db from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler from app.scheduler import start_scheduler, stop_scheduler
from app.observability.metrics import instrument_app from app.observability.metrics import instrument_app
from app.services.http_client_pool import close_rdap_http_client
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
@ -59,6 +60,7 @@ async def lifespan(app: FastAPI):
# Shutdown # Shutdown
if settings.enable_scheduler: if settings.enable_scheduler:
stop_scheduler() stop_scheduler()
await close_rdap_http_client()
logger.info("Application shutdown complete") logger.info("Application shutdown complete")

View File

@ -22,6 +22,7 @@ import whodap
import httpx import httpx
from app.models.domain import DomainStatus from app.models.domain import DomainStatus
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -164,129 +165,116 @@ class DomainChecker:
url = f"{endpoint}{domain}" url = f"{endpoint}{domain}"
try: try:
async with httpx.AsyncClient(timeout=10.0) as client: client = await get_rdap_http_client()
response = await client.get(url, follow_redirects=True) response = await client.get(url, timeout=10.0)
if response.status_code == 404: if response.status_code == 404:
# Domain not found = available # Domain not found = available
return DomainCheckResult(
domain=domain,
status=DomainStatus.AVAILABLE,
is_available=True,
check_method="rdap_custom",
)
if response.status_code == 200:
# Domain exists in registry - but check status for pending delete
data = response.json()
# Check if domain is pending deletion (dropped but not yet purged)
domain_status = data.get("status", [])
pending_delete_statuses = [
"pending delete",
"pendingdelete",
"redemption period",
"redemptionperiod",
"pending purge",
"pendingpurge",
]
is_pending_delete = any(
any(pds in str(s).lower() for pds in pending_delete_statuses)
for s in domain_status
)
if is_pending_delete:
logger.info(
f"{domain} is in transition/pending delete (status: {domain_status})"
)
return DomainCheckResult( return DomainCheckResult(
domain=domain, domain=domain,
status=DomainStatus.AVAILABLE, status=DomainStatus.DROPPING_SOON, # In transition, not yet available
is_available=True, is_available=False, # Not yet registrable
check_method="rdap_custom", check_method="rdap_custom",
raw_data={"rdap_status": domain_status, "note": "pending_delete"},
) )
if response.status_code == 200: # Extract dates from events
# Domain exists in registry - but check status for pending delete expiration_date = None
data = response.json() creation_date = None
updated_date = None
registrar = None
name_servers: list[str] = []
# Check if domain is pending deletion (dropped but not yet purged) # Parse events
# These domains are effectively available for registration events = data.get("events", [])
domain_status = data.get('status', []) for event in events:
pending_delete_statuses = [ action = event.get("eventAction", "").lower()
'pending delete', date_str = event.get("eventDate", "")
'pendingdelete',
'redemption period',
'redemptionperiod',
'pending purge',
'pendingpurge',
]
is_pending_delete = any( if not expiration_date and any(x in action for x in ["expiration", "expire"]):
any(pds in str(s).lower() for pds in pending_delete_statuses) expiration_date = self._parse_datetime(date_str)
for s in domain_status
)
if is_pending_delete: if not creation_date and any(x in action for x in ["registration", "created"]):
logger.info(f"{domain} is in transition/pending delete (status: {domain_status})") creation_date = self._parse_datetime(date_str)
return DomainCheckResult(
domain=domain,
status=DomainStatus.DROPPING_SOON, # In transition, not yet available
is_available=False, # Not yet registrable
check_method="rdap_custom",
raw_data={"rdap_status": domain_status, "note": "pending_delete"},
)
# Extract dates from events if any(x in action for x in ["changed", "update", "last changed"]):
expiration_date = None updated_date = self._parse_datetime(date_str)
creation_date = None
updated_date = None
registrar = None
name_servers = []
# Parse events - different registries use different event actions # Parse nameservers
# SWITCH (.ch/.li): uses "expiration" for ns in data.get("nameservers", []):
# DENIC (.de): uses "last changed" but no expiration in RDAP (only WHOIS) if isinstance(ns, dict):
events = data.get('events', []) ns_name = ns.get("ldhName", "")
for event in events: if ns_name:
action = event.get('eventAction', '').lower() name_servers.append(ns_name.lower())
date_str = event.get('eventDate', '')
# Expiration date - check multiple variations # Parse registrar from entities
if not expiration_date: for entity in data.get("entities", []):
if any(x in action for x in ['expiration', 'expire']): roles = entity.get("roles", [])
expiration_date = self._parse_datetime(date_str) if any(r in roles for r in ["registrar", "technical"]):
vcard = entity.get("vcardArray", [])
if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]:
if isinstance(item, list) and len(item) > 3:
if item[0] in ("fn", "org") and item[3]:
registrar = str(item[3])
break
if not registrar:
handle = entity.get("handle", "")
if handle:
registrar = handle
if registrar:
break
# Creation/registration date # For .de domains: DENIC doesn't expose expiration via RDAP
if not creation_date: if tld == "de" and not expiration_date:
if any(x in action for x in ['registration', 'created']): logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS")
creation_date = self._parse_datetime(date_str)
# Update date return DomainCheckResult(
if any(x in action for x in ['changed', 'update', 'last changed']): domain=domain,
updated_date = self._parse_datetime(date_str) status=DomainStatus.TAKEN,
is_available=False,
registrar=registrar,
expiration_date=expiration_date,
creation_date=creation_date,
updated_date=updated_date,
name_servers=name_servers if name_servers else None,
check_method="rdap_custom",
)
# Parse nameservers # Other status codes - try fallback
nameservers = data.get('nameservers', []) logger.warning(f"Custom RDAP returned {response.status_code} for {domain}")
for ns in nameservers: return None
if isinstance(ns, dict):
ns_name = ns.get('ldhName', '')
if ns_name:
name_servers.append(ns_name.lower())
# Parse registrar from entities - check multiple roles
entities = data.get('entities', [])
for entity in entities:
roles = entity.get('roles', [])
# Look for registrar or technical contact as registrar source
if any(r in roles for r in ['registrar', 'technical']):
# Try vcardArray first
vcard = entity.get('vcardArray', [])
if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]:
if isinstance(item, list) and len(item) > 3:
if item[0] in ('fn', 'org') and item[3]:
registrar = str(item[3])
break
# Try handle as fallback
if not registrar:
handle = entity.get('handle', '')
if handle:
registrar = handle
if registrar:
break
# For .de domains: DENIC doesn't expose expiration via RDAP
# We need to use WHOIS as fallback for expiration date
if tld == 'de' and not expiration_date:
logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS")
# Return what we have, scheduler will update via WHOIS later
return DomainCheckResult(
domain=domain,
status=DomainStatus.TAKEN,
is_available=False,
registrar=registrar,
expiration_date=expiration_date,
creation_date=creation_date,
updated_date=updated_date,
name_servers=name_servers if name_servers else None,
check_method="rdap_custom",
)
# Other status codes - try fallback
logger.warning(f"Custom RDAP returned {response.status_code} for {domain}")
return None
except httpx.TimeoutException: except httpx.TimeoutException:
logger.warning(f"Custom RDAP timeout for {domain}") logger.warning(f"Custom RDAP timeout for {domain}")
@ -305,74 +293,64 @@ class DomainChecker:
url = f"{self.IANA_BOOTSTRAP_URL}{domain}" url = f"{self.IANA_BOOTSTRAP_URL}{domain}"
try: try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client: client = await get_rdap_http_client()
response = await client.get(url) response = await client.get(url, timeout=15.0)
if response.status_code == 404:
return DomainCheckResult(
domain=domain,
status=DomainStatus.AVAILABLE,
is_available=True,
check_method="rdap_iana",
)
if response.status_code == 429:
logger.warning(f"RDAP rate limited for {domain}")
return None
if response.status_code != 200:
return None
data = response.json()
# Parse events for dates
expiration_date = None
creation_date = None
registrar = None
for event in data.get('events', []):
action = event.get('eventAction', '').lower()
date_str = event.get('eventDate', '')
if 'expiration' in action and date_str:
expiration_date = self._parse_datetime(date_str)
elif 'registration' in action and date_str:
creation_date = self._parse_datetime(date_str)
# Extract registrar
for entity in data.get('entities', []):
roles = entity.get('roles', [])
if 'registrar' in roles:
vcard = entity.get('vcardArray', [])
if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]:
if isinstance(item, list) and len(item) > 3:
if item[0] == 'fn' and item[3]:
registrar = str(item[3])
break
# Check status for pending delete
status_list = data.get('status', [])
status_str = ' '.join(str(s).lower() for s in status_list)
is_dropping = any(x in status_str for x in [
'pending delete', 'pendingdelete',
'redemption period', 'redemptionperiod',
])
if is_dropping:
return DomainCheckResult(
domain=domain,
status=DomainStatus.DROPPING_SOON,
is_available=False,
registrar=registrar,
expiration_date=expiration_date,
creation_date=creation_date,
check_method="rdap_iana",
)
if response.status_code == 404:
return DomainCheckResult( return DomainCheckResult(
domain=domain, domain=domain,
status=DomainStatus.TAKEN, status=DomainStatus.AVAILABLE,
is_available=True,
check_method="rdap_iana",
)
if response.status_code == 429:
logger.warning(f"RDAP rate limited for {domain}")
return None
if response.status_code != 200:
return None
data = response.json()
# Parse events for dates
expiration_date = None
creation_date = None
registrar = None
for event in data.get('events', []):
action = event.get('eventAction', '').lower()
date_str = event.get('eventDate', '')
if 'expiration' in action and date_str:
expiration_date = self._parse_datetime(date_str)
elif 'registration' in action and date_str:
creation_date = self._parse_datetime(date_str)
# Extract registrar
for entity in data.get('entities', []):
roles = entity.get('roles', [])
if 'registrar' in roles:
vcard = entity.get('vcardArray', [])
if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]:
if isinstance(item, list) and len(item) > 3:
if item[0] == 'fn' and item[3]:
registrar = str(item[3])
break
# Check status for pending delete
status_list = data.get('status', [])
status_str = ' '.join(str(s).lower() for s in status_list)
is_dropping = any(x in status_str for x in [
'pending delete', 'pendingdelete',
'redemption period', 'redemptionperiod',
])
if is_dropping:
return DomainCheckResult(
domain=domain,
status=DomainStatus.DROPPING_SOON,
is_available=False, is_available=False,
registrar=registrar, registrar=registrar,
expiration_date=expiration_date, expiration_date=expiration_date,
@ -380,6 +358,16 @@ class DomainChecker:
check_method="rdap_iana", check_method="rdap_iana",
) )
return DomainCheckResult(
domain=domain,
status=DomainStatus.TAKEN,
is_available=False,
registrar=registrar,
expiration_date=expiration_date,
creation_date=creation_date,
check_method="rdap_iana",
)
except httpx.TimeoutException: except httpx.TimeoutException:
logger.debug(f"IANA RDAP timeout for {domain}") logger.debug(f"IANA RDAP timeout for {domain}")
return None return None

View File

@ -15,6 +15,8 @@ from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from typing import Optional from typing import Optional
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ============================================================================ # ============================================================================
@ -49,30 +51,26 @@ class DropStatus:
check_method: str = "rdap" check_method: str = "rdap"
async def _make_rdap_request(url: str, domain: str) -> Optional[dict]: async def _make_rdap_request(client: httpx.AsyncClient, url: str, domain: str) -> Optional[dict]:
"""Make a single RDAP request with proper error handling.""" """Make a single RDAP request with proper error handling."""
try: try:
async with httpx.AsyncClient( resp = await client.get(url, timeout=RDAP_TIMEOUT)
timeout=RDAP_TIMEOUT,
follow_redirects=True, # Important for IANA Bootstrap
) as client:
resp = await client.get(url)
if resp.status_code == 404: if resp.status_code == 404:
# Domain not found = available # Domain not found = available
return {"_available": True, "_status_code": 404} return {"_available": True, "_status_code": 404}
if resp.status_code == 200: if resp.status_code == 200:
data = resp.json() data = resp.json()
data["_status_code"] = 200 data["_status_code"] = 200
return data return data
if resp.status_code == 429: if resp.status_code == 429:
logger.warning(f"RDAP rate limited for {domain}") logger.warning(f"RDAP rate limited for {domain}")
return {"_rate_limited": True, "_status_code": 429} return {"_rate_limited": True, "_status_code": 429}
logger.warning(f"RDAP returned {resp.status_code} for {domain}") logger.warning(f"RDAP returned {resp.status_code} for {domain}")
return None return None
except httpx.TimeoutException: except httpx.TimeoutException:
logger.debug(f"RDAP timeout for {domain} at {url}") logger.debug(f"RDAP timeout for {domain} at {url}")
@ -102,16 +100,17 @@ async def check_drop_status(domain: str) -> DropStatus:
# Try preferred endpoint first # Try preferred endpoint first
data = None data = None
check_method = "rdap" check_method = "rdap"
client = await get_rdap_http_client()
if tld in PREFERRED_ENDPOINTS: if tld in PREFERRED_ENDPOINTS:
url = f"{PREFERRED_ENDPOINTS[tld]}{domain}" url = f"{PREFERRED_ENDPOINTS[tld]}{domain}"
data = await _make_rdap_request(url, domain) data = await _make_rdap_request(client, url, domain)
check_method = f"rdap_{tld}" check_method = f"rdap_{tld}"
# Fall back to IANA Bootstrap if no data yet # Fall back to IANA Bootstrap if no data yet
if data is None: if data is None:
url = f"{IANA_BOOTSTRAP}{domain}" url = f"{IANA_BOOTSTRAP}{domain}"
data = await _make_rdap_request(url, domain) data = await _make_rdap_request(client, url, domain)
check_method = "rdap_iana" check_method = "rdap_iana"
# Still no data? Return unknown # Still no data? Return unknown

View File

@ -0,0 +1,70 @@
"""
Shared HTTP clients for performance.
Why:
- Creating a new httpx.AsyncClient per request is expensive (TLS handshakes, no connection reuse).
- For high-frequency lookups (RDAP), we keep one pooled AsyncClient per process.
Notes:
- Per-request timeouts can still be overridden in client.get(..., timeout=...).
- Call close_* on shutdown for clean exit (optional but recommended).
"""
from __future__ import annotations
import asyncio
from typing import Optional
import httpx
_rdap_client: Optional[httpx.AsyncClient] = None
_rdap_client_lock = asyncio.Lock()
def _rdap_limits() -> httpx.Limits:
# Conservative but effective defaults (works well for bursty traffic).
return httpx.Limits(max_connections=50, max_keepalive_connections=20, keepalive_expiry=30.0)
def _rdap_timeout() -> httpx.Timeout:
# Overall timeout can be overridden per request.
return httpx.Timeout(15.0, connect=5.0)
async def get_rdap_http_client() -> httpx.AsyncClient:
"""
Get a shared httpx.AsyncClient for RDAP requests.
Safe for concurrent use within the same event loop.
"""
global _rdap_client
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
async with _rdap_client_lock:
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
_rdap_client = httpx.AsyncClient(
timeout=_rdap_timeout(),
follow_redirects=True,
limits=_rdap_limits(),
headers={
# Be a good citizen; many registries/redirectors are sensitive.
"User-Agent": "pounce/1.0 (+https://pounce.ch)",
"Accept": "application/rdap+json, application/json",
},
)
return _rdap_client
async def close_rdap_http_client() -> None:
"""Close the shared RDAP client (best-effort)."""
global _rdap_client
if _rdap_client is None:
return
try:
if not _rdap_client.is_closed:
await _rdap_client.aclose()
finally:
_rdap_client = None

View File

@ -18,6 +18,7 @@ load_dotenv()
from app.config import get_settings from app.config import get_settings
from app.database import init_db from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler from app.scheduler import start_scheduler, stop_scheduler
from app.services.http_client_pool import close_rdap_http_client
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
@ -54,6 +55,7 @@ async def main() -> None:
await stop_event.wait() await stop_event.wait()
stop_scheduler() stop_scheduler()
await close_rdap_http_client()
logger.info("Scheduler stopped. Bye.") logger.info("Scheduler stopped. Bye.")