Add LLM gateway proxy endpoint (Trader/Tycoon)

2025-12-17 13:12:45 +01:00
parent 19cd61f3d3
commit bd3046b782
4 changed files with 155 additions and 0 deletions
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router
 from app.api.hunt import router as hunt_router
 from app.api.cfo import router as cfo_router
 from app.api.drops import router as drops_router
 from app.api.llm import router as llm_router
 api_router = APIRouter()
@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"])
 api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
 api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
 api_router.include_router(drops_router, tags=["Drops - Zone Files"])
 api_router.include_router(llm_router, tags=["LLM"])
 # Marketplace (For Sale) - from analysis_3.md
 api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])
--- a/backend/app/api/llm.py
+++ b/backend/app/api/llm.py
@ -0,0 +1,93 @@
 """
 LLM API endpoints (Pounce -> Ollama Gateway).
 This is intentionally a thin proxy:
 - Enforces Pounce authentication (HttpOnly cookie)
 - Enforces tier gating (Trader/Tycoon)
 - Proxies to the internal LLM gateway (which talks to Ollama)
 """
 from __future__ import annotations
 from typing import Any, Literal, Optional
 from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
 from fastapi.responses import JSONResponse, StreamingResponse
 from pydantic import BaseModel, Field
 from sqlalchemy import select
 from app.api.deps import CurrentUser, Database
 from app.config import get_settings
 from app.models.subscription import Subscription, SubscriptionTier
 from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
 router = APIRouter(prefix="/llm", tags=["LLM"])
 settings = get_settings()
 class ChatMessage(BaseModel):
    role: Literal["system", "user", "assistant"]
    content: str
 class ChatCompletionsRequest(BaseModel):
    model: Optional[str] = None
    messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
    temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
    stream: bool = False
 async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
    res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
    sub = res.scalar_one_or_none()
    if sub:
        return sub
    sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
    db.add(sub)
    await db.commit()
    await db.refresh(sub)
    return sub
 def _require_trader_or_higher(sub: Subscription) -> None:
    if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
        )
@router.post("/chat/completions")
 async def llm_chat_completions(
    req: ChatCompletionsRequest,
    current_user: CurrentUser,
    db: Database,
 ):
    """
    Proxy Chat Completions to internal Ollama gateway.
    Returns OpenAI-ish JSON or SSE when stream=true.
    """
    sub = await _get_or_create_subscription(db, current_user.id)
    _require_trader_or_higher(sub)
    payload: dict[str, Any] = {
        "model": (req.model or settings.llm_default_model),
        "messages": [m.model_dump() for m in req.messages],
        "temperature": req.temperature,
        "stream": bool(req.stream),
    }
    try:
        if req.stream:
            return StreamingResponse(
                chat_completions_stream(payload),
                media_type="text/event-stream",
                headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
            )
        data = await chat_completions(payload)
        return JSONResponse(data)
    except LLMGatewayError as e:
        raise HTTPException(status_code=502, detail=str(e))
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -117,6 +117,13 @@ class Settings(BaseSettings):
    moz_access_id: str = ""
    moz_secret_key: str = ""
    # =================================
    # LLM Gateway (Ollama / Mistral Nemo)
    # =================================
    llm_gateway_url: str = "http://127.0.0.1:8812"  # reverse-tunnel default on Pounce server
    llm_gateway_api_key: str = ""
    llm_default_model: str = "mistral-nemo:latest"
    # ICANN CZDS (Centralized Zone Data Service)
    # For downloading gTLD zone files (.com, .net, .org, etc.)
    # Register at: https://czds.icann.org/
--- a/backend/app/services/llm_gateway.py
+++ b/backend/app/services/llm_gateway.py
@ -0,0 +1,53 @@
 from __future__ import annotations
 import json
 from typing import Any, AsyncIterator, Optional
 import httpx
 from app.config import get_settings
 settings = get_settings()
 class LLMGatewayError(RuntimeError):
    pass
 def _auth_headers() -> dict[str, str]:
    key = (settings.llm_gateway_api_key or "").strip()
    if not key:
        raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)")
    return {"Authorization": f"Bearer {key}"}
 async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]:
    """
    Non-streaming call to the LLM gateway (OpenAI-ish format).
    """
    url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
    async with httpx.AsyncClient(timeout=60) as client:
        r = await client.post(url, headers=_auth_headers(), json=payload)
        if r.status_code >= 400:
            raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}")
        return r.json()
 async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]:
    """
    Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through.
    """
    url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
    timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10)
    async with httpx.AsyncClient(timeout=timeout) as client:
        async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r:
            if r.status_code >= 400:
                body = await r.aread()
                raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}")
            async for chunk in r.aiter_bytes():
                if chunk:
                    yield chunk