from __future__ import annotations import json from typing import Any, AsyncIterator, Optional import httpx from app.config import get_settings settings = get_settings() class LLMGatewayError(RuntimeError): pass def _auth_headers() -> dict[str, str]: key = (settings.llm_gateway_api_key or "").strip() if not key: raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)") return {"Authorization": f"Bearer {key}"} async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]: """ Non-streaming call to the LLM gateway (OpenAI-ish format). """ url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions" async with httpx.AsyncClient(timeout=60) as client: r = await client.post(url, headers=_auth_headers(), json=payload) if r.status_code >= 400: raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}") return r.json() async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]: """ Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through. """ url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions" timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10) async with httpx.AsyncClient(timeout=timeout) as client: async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r: if r.status_code >= 400: body = await r.aread() raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}") async for chunk in r.aiter_bytes(): if chunk: yield chunk