Add LLM gateway proxy endpoint (Trader/Tycoon)

2025-12-17 13:12:45 +01:00
parent 19cd61f3d3
commit bd3046b782
4 changed files with 155 additions and 0 deletions
--- a/backend/app/api/init.py
+++ b/backend/app/api/init.py
@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router
 from app.api.hunt import router as hunt_router
 from app.api.cfo import router as cfo_router
 from app.api.drops import router as drops_router
+from app.api.llm import router as llm_router

 api_router = APIRouter()

@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"])
 api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
 api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
 api_router.include_router(drops_router, tags=["Drops - Zone Files"])
+api_router.include_router(llm_router, tags=["LLM"])

 # Marketplace (For Sale) - from analysis_3.md
 api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])
--- a/backend/app/api/llm.py
+++ b/backend/app/api/llm.py
@ -0,0 +1,93 @@
+"""
+LLM API endpoints (Pounce -> Ollama Gateway).
+
+This is intentionally a thin proxy:
+- Enforces Pounce authentication (HttpOnly cookie)
+- Enforces tier gating (Trader/Tycoon)
+- Proxies to the internal LLM gateway (which talks to Ollama)
+"""
+
+from __future__ import annotations
+
+from typing import Any, Literal, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+from sqlalchemy import select
+
+from app.api.deps import CurrentUser, Database
+from app.config import get_settings
+from app.models.subscription import Subscription, SubscriptionTier
+from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
+
+
+router = APIRouter(prefix="/llm", tags=["LLM"])
+settings = get_settings()
+
+
+class ChatMessage(BaseModel):
+    role: Literal["system", "user", "assistant"]
+    content: str
+
+
+class ChatCompletionsRequest(BaseModel):
+    model: Optional[str] = None
+    messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
+    temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
+    stream: bool = False
+
+
+async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
+    res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
+    sub = res.scalar_one_or_none()
+    if sub:
+        return sub
+    sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
+    db.add(sub)
+    await db.commit()
+    await db.refresh(sub)
+    return sub
+
+
+def _require_trader_or_higher(sub: Subscription) -> None:
+    if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
+        )
+
+
+@router.post("/chat/completions")
+async def llm_chat_completions(
+    req: ChatCompletionsRequest,
+    current_user: CurrentUser,
+    db: Database,
+):
+    """
+    Proxy Chat Completions to internal Ollama gateway.
+    Returns OpenAI-ish JSON or SSE when stream=true.
+    """
+    sub = await _get_or_create_subscription(db, current_user.id)
+    _require_trader_or_higher(sub)
+
+    payload: dict[str, Any] = {
+        "model": (req.model or settings.llm_default_model),
+        "messages": [m.model_dump() for m in req.messages],
+        "temperature": req.temperature,
+        "stream": bool(req.stream),
+    }
+
+    try:
+        if req.stream:
+            return StreamingResponse(
+                chat_completions_stream(payload),
+                media_type="text/event-stream",
+                headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+            )
+        data = await chat_completions(payload)
+        return JSONResponse(data)
+    except LLMGatewayError as e:
+        raise HTTPException(status_code=502, detail=str(e))
+
+
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -116,6 +116,13 @@ class Settings(BaseSettings):
    # Moz API (SEO Data)
    moz_access_id: str = ""
    moz_secret_key: str = ""
+
+    # =================================
+    # LLM Gateway (Ollama / Mistral Nemo)
+    # =================================
+    llm_gateway_url: str = "http://127.0.0.1:8812"  # reverse-tunnel default on Pounce server
+    llm_gateway_api_key: str = ""
+    llm_default_model: str = "mistral-nemo:latest"
    
    # ICANN CZDS (Centralized Zone Data Service)
    # For downloading gTLD zone files (.com, .net, .org, etc.)
--- a/backend/app/services/llm_gateway.py
+++ b/backend/app/services/llm_gateway.py
@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import json
+from typing import Any, AsyncIterator, Optional
+
+import httpx
+
+from app.config import get_settings
+
+
+settings = get_settings()
+
+
+class LLMGatewayError(RuntimeError):
+    pass
+
+
+def _auth_headers() -> dict[str, str]:
+    key = (settings.llm_gateway_api_key or "").strip()
+    if not key:
+        raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)")
+    return {"Authorization": f"Bearer {key}"}
+
+
+async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]:
+    """
+    Non-streaming call to the LLM gateway (OpenAI-ish format).
+    """
+    url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
+    async with httpx.AsyncClient(timeout=60) as client:
+        r = await client.post(url, headers=_auth_headers(), json=payload)
+        if r.status_code >= 400:
+            raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}")
+        return r.json()
+
+
+async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]:
+    """
+    Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through.
+    """
+    url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
+    timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10)
+    async with httpx.AsyncClient(timeout=timeout) as client:
+        async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r:
+            if r.status_code >= 400:
+                body = await r.aread()
+                raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}")
+
+            async for chunk in r.aiter_bytes():
+                if chunk:
+                    yield chunk
+
+