perf: Reuse pooled http client for RDAP

This commit is contained in:
2025-12-21 15:50:59 +01:00
parent 4ec86789cf
commit 9205536bf2
5 changed files with 269 additions and 208 deletions

View File

@ -19,6 +19,7 @@ from app.config import get_settings
from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler
from app.observability.metrics import instrument_app
from app.services.http_client_pool import close_rdap_http_client
# Configure logging
logging.basicConfig(
@ -59,6 +60,7 @@ async def lifespan(app: FastAPI):
# Shutdown
if settings.enable_scheduler:
stop_scheduler()
await close_rdap_http_client()
logger.info("Application shutdown complete")

View File

@ -22,6 +22,7 @@ import whodap
import httpx
from app.models.domain import DomainStatus
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__)
@ -164,8 +165,8 @@ class DomainChecker:
url = f"{endpoint}{domain}"
try:
async with httpx.AsyncClient(timeout=10.0) as client:
response = await client.get(url, follow_redirects=True)
client = await get_rdap_http_client()
response = await client.get(url, timeout=10.0)
if response.status_code == 404:
# Domain not found = available
@ -181,15 +182,14 @@ class DomainChecker:
data = response.json()
# Check if domain is pending deletion (dropped but not yet purged)
# These domains are effectively available for registration
domain_status = data.get('status', [])
domain_status = data.get("status", [])
pending_delete_statuses = [
'pending delete',
'pendingdelete',
'redemption period',
'redemptionperiod',
'pending purge',
'pendingpurge',
"pending delete",
"pendingdelete",
"redemption period",
"redemptionperiod",
"pending purge",
"pendingpurge",
]
is_pending_delete = any(
@ -198,7 +198,9 @@ class DomainChecker:
)
if is_pending_delete:
logger.info(f"{domain} is in transition/pending delete (status: {domain_status})")
logger.info(
f"{domain} is in transition/pending delete (status: {domain_status})"
)
return DomainCheckResult(
domain=domain,
status=DomainStatus.DROPPING_SOON, # In transition, not yet available
@ -212,65 +214,51 @@ class DomainChecker:
creation_date = None
updated_date = None
registrar = None
name_servers = []
name_servers: list[str] = []
# Parse events - different registries use different event actions
# SWITCH (.ch/.li): uses "expiration"
# DENIC (.de): uses "last changed" but no expiration in RDAP (only WHOIS)
events = data.get('events', [])
# Parse events
events = data.get("events", [])
for event in events:
action = event.get('eventAction', '').lower()
date_str = event.get('eventDate', '')
action = event.get("eventAction", "").lower()
date_str = event.get("eventDate", "")
# Expiration date - check multiple variations
if not expiration_date:
if any(x in action for x in ['expiration', 'expire']):
if not expiration_date and any(x in action for x in ["expiration", "expire"]):
expiration_date = self._parse_datetime(date_str)
# Creation/registration date
if not creation_date:
if any(x in action for x in ['registration', 'created']):
if not creation_date and any(x in action for x in ["registration", "created"]):
creation_date = self._parse_datetime(date_str)
# Update date
if any(x in action for x in ['changed', 'update', 'last changed']):
if any(x in action for x in ["changed", "update", "last changed"]):
updated_date = self._parse_datetime(date_str)
# Parse nameservers
nameservers = data.get('nameservers', [])
for ns in nameservers:
for ns in data.get("nameservers", []):
if isinstance(ns, dict):
ns_name = ns.get('ldhName', '')
ns_name = ns.get("ldhName", "")
if ns_name:
name_servers.append(ns_name.lower())
# Parse registrar from entities - check multiple roles
entities = data.get('entities', [])
for entity in entities:
roles = entity.get('roles', [])
# Look for registrar or technical contact as registrar source
if any(r in roles for r in ['registrar', 'technical']):
# Try vcardArray first
vcard = entity.get('vcardArray', [])
# Parse registrar from entities
for entity in data.get("entities", []):
roles = entity.get("roles", [])
if any(r in roles for r in ["registrar", "technical"]):
vcard = entity.get("vcardArray", [])
if isinstance(vcard, list) and len(vcard) > 1:
for item in vcard[1]:
if isinstance(item, list) and len(item) > 3:
if item[0] in ('fn', 'org') and item[3]:
if item[0] in ("fn", "org") and item[3]:
registrar = str(item[3])
break
# Try handle as fallback
if not registrar:
handle = entity.get('handle', '')
handle = entity.get("handle", "")
if handle:
registrar = handle
if registrar:
break
# For .de domains: DENIC doesn't expose expiration via RDAP
# We need to use WHOIS as fallback for expiration date
if tld == 'de' and not expiration_date:
if tld == "de" and not expiration_date:
logger.debug(f"No expiration in RDAP for {domain}, will try WHOIS")
# Return what we have, scheduler will update via WHOIS later
return DomainCheckResult(
domain=domain,
@ -305,8 +293,8 @@ class DomainChecker:
url = f"{self.IANA_BOOTSTRAP_URL}{domain}"
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(url)
client = await get_rdap_http_client()
response = await client.get(url, timeout=15.0)
if response.status_code == 404:
return DomainCheckResult(

View File

@ -15,6 +15,8 @@ from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from app.services.http_client_pool import get_rdap_http_client
logger = logging.getLogger(__name__)
# ============================================================================
@ -49,14 +51,10 @@ class DropStatus:
check_method: str = "rdap"
async def _make_rdap_request(url: str, domain: str) -> Optional[dict]:
async def _make_rdap_request(client: httpx.AsyncClient, url: str, domain: str) -> Optional[dict]:
"""Make a single RDAP request with proper error handling."""
try:
async with httpx.AsyncClient(
timeout=RDAP_TIMEOUT,
follow_redirects=True, # Important for IANA Bootstrap
) as client:
resp = await client.get(url)
resp = await client.get(url, timeout=RDAP_TIMEOUT)
if resp.status_code == 404:
# Domain not found = available
@ -102,16 +100,17 @@ async def check_drop_status(domain: str) -> DropStatus:
# Try preferred endpoint first
data = None
check_method = "rdap"
client = await get_rdap_http_client()
if tld in PREFERRED_ENDPOINTS:
url = f"{PREFERRED_ENDPOINTS[tld]}{domain}"
data = await _make_rdap_request(url, domain)
data = await _make_rdap_request(client, url, domain)
check_method = f"rdap_{tld}"
# Fall back to IANA Bootstrap if no data yet
if data is None:
url = f"{IANA_BOOTSTRAP}{domain}"
data = await _make_rdap_request(url, domain)
data = await _make_rdap_request(client, url, domain)
check_method = "rdap_iana"
# Still no data? Return unknown

View File

@ -0,0 +1,70 @@
"""
Shared HTTP clients for performance.
Why:
- Creating a new httpx.AsyncClient per request is expensive (TLS handshakes, no connection reuse).
- For high-frequency lookups (RDAP), we keep one pooled AsyncClient per process.
Notes:
- Per-request timeouts can still be overridden in client.get(..., timeout=...).
- Call close_* on shutdown for clean exit (optional but recommended).
"""
from __future__ import annotations
import asyncio
from typing import Optional
import httpx
_rdap_client: Optional[httpx.AsyncClient] = None
_rdap_client_lock = asyncio.Lock()
def _rdap_limits() -> httpx.Limits:
# Conservative but effective defaults (works well for bursty traffic).
return httpx.Limits(max_connections=50, max_keepalive_connections=20, keepalive_expiry=30.0)
def _rdap_timeout() -> httpx.Timeout:
# Overall timeout can be overridden per request.
return httpx.Timeout(15.0, connect=5.0)
async def get_rdap_http_client() -> httpx.AsyncClient:
"""
Get a shared httpx.AsyncClient for RDAP requests.
Safe for concurrent use within the same event loop.
"""
global _rdap_client
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
async with _rdap_client_lock:
if _rdap_client is not None and not _rdap_client.is_closed:
return _rdap_client
_rdap_client = httpx.AsyncClient(
timeout=_rdap_timeout(),
follow_redirects=True,
limits=_rdap_limits(),
headers={
# Be a good citizen; many registries/redirectors are sensitive.
"User-Agent": "pounce/1.0 (+https://pounce.ch)",
"Accept": "application/rdap+json, application/json",
},
)
return _rdap_client
async def close_rdap_http_client() -> None:
"""Close the shared RDAP client (best-effort)."""
global _rdap_client
if _rdap_client is None:
return
try:
if not _rdap_client.is_closed:
await _rdap_client.aclose()
finally:
_rdap_client = None

View File

@ -18,6 +18,7 @@ load_dotenv()
from app.config import get_settings
from app.database import init_db
from app.scheduler import start_scheduler, stop_scheduler
from app.services.http_client_pool import close_rdap_http_client
logging.basicConfig(
level=logging.INFO,
@ -54,6 +55,7 @@ async def main() -> None:
await stop_event.wait()
stop_scheduler()
await close_rdap_http_client()
logger.info("Scheduler stopped. Bye.")