Add LLM gateway proxy endpoint (Trader/Tycoon)
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled

This commit is contained in:
2025-12-17 13:12:45 +01:00
parent 19cd61f3d3
commit bd3046b782
4 changed files with 155 additions and 0 deletions

View File

@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router
from app.api.hunt import router as hunt_router
from app.api.cfo import router as cfo_router
from app.api.drops import router as drops_router
from app.api.llm import router as llm_router
api_router = APIRouter()
@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"])
api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
api_router.include_router(drops_router, tags=["Drops - Zone Files"])
api_router.include_router(llm_router, tags=["LLM"])
# Marketplace (For Sale) - from analysis_3.md
api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])

93
backend/app/api/llm.py Normal file
View File

@ -0,0 +1,93 @@
"""
LLM API endpoints (Pounce -> Ollama Gateway).
This is intentionally a thin proxy:
- Enforces Pounce authentication (HttpOnly cookie)
- Enforces tier gating (Trader/Tycoon)
- Proxies to the internal LLM gateway (which talks to Ollama)
"""
from __future__ import annotations
from typing import Any, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy import select
from app.api.deps import CurrentUser, Database
from app.config import get_settings
from app.models.subscription import Subscription, SubscriptionTier
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
router = APIRouter(prefix="/llm", tags=["LLM"])
settings = get_settings()
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
class ChatCompletionsRequest(BaseModel):
model: Optional[str] = None
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
stream: bool = False
async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
sub = res.scalar_one_or_none()
if sub:
return sub
sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
db.add(sub)
await db.commit()
await db.refresh(sub)
return sub
def _require_trader_or_higher(sub: Subscription) -> None:
if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
)
@router.post("/chat/completions")
async def llm_chat_completions(
req: ChatCompletionsRequest,
current_user: CurrentUser,
db: Database,
):
"""
Proxy Chat Completions to internal Ollama gateway.
Returns OpenAI-ish JSON or SSE when stream=true.
"""
sub = await _get_or_create_subscription(db, current_user.id)
_require_trader_or_higher(sub)
payload: dict[str, Any] = {
"model": (req.model or settings.llm_default_model),
"messages": [m.model_dump() for m in req.messages],
"temperature": req.temperature,
"stream": bool(req.stream),
}
try:
if req.stream:
return StreamingResponse(
chat_completions_stream(payload),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
)
data = await chat_completions(payload)
return JSONResponse(data)
except LLMGatewayError as e:
raise HTTPException(status_code=502, detail=str(e))

View File

@ -117,6 +117,13 @@ class Settings(BaseSettings):
moz_access_id: str = ""
moz_secret_key: str = ""
# =================================
# LLM Gateway (Ollama / Mistral Nemo)
# =================================
llm_gateway_url: str = "http://127.0.0.1:8812" # reverse-tunnel default on Pounce server
llm_gateway_api_key: str = ""
llm_default_model: str = "mistral-nemo:latest"
# ICANN CZDS (Centralized Zone Data Service)
# For downloading gTLD zone files (.com, .net, .org, etc.)
# Register at: https://czds.icann.org/

View File

@ -0,0 +1,53 @@
from __future__ import annotations
import json
from typing import Any, AsyncIterator, Optional
import httpx
from app.config import get_settings
settings = get_settings()
class LLMGatewayError(RuntimeError):
pass
def _auth_headers() -> dict[str, str]:
key = (settings.llm_gateway_api_key or "").strip()
if not key:
raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)")
return {"Authorization": f"Bearer {key}"}
async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]:
"""
Non-streaming call to the LLM gateway (OpenAI-ish format).
"""
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
async with httpx.AsyncClient(timeout=60) as client:
r = await client.post(url, headers=_auth_headers(), json=payload)
if r.status_code >= 400:
raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}")
return r.json()
async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]:
"""
Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through.
"""
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r:
if r.status_code >= 400:
body = await r.aread()
raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}")
async for chunk in r.aiter_bytes():
if chunk:
yield chunk