Yves Gugger bd3046b782
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Add LLM gateway proxy endpoint (Trader/Tycoon)
2025-12-17 13:12:45 +01:00

94 lines
2.9 KiB
Python

"""
LLM API endpoints (Pounce -> Ollama Gateway).
This is intentionally a thin proxy:
- Enforces Pounce authentication (HttpOnly cookie)
- Enforces tier gating (Trader/Tycoon)
- Proxies to the internal LLM gateway (which talks to Ollama)
"""
from __future__ import annotations
from typing import Any, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy import select
from app.api.deps import CurrentUser, Database
from app.config import get_settings
from app.models.subscription import Subscription, SubscriptionTier
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
router = APIRouter(prefix="/llm", tags=["LLM"])
settings = get_settings()
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
class ChatCompletionsRequest(BaseModel):
model: Optional[str] = None
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
stream: bool = False
async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
sub = res.scalar_one_or_none()
if sub:
return sub
sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
db.add(sub)
await db.commit()
await db.refresh(sub)
return sub
def _require_trader_or_higher(sub: Subscription) -> None:
if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
)
@router.post("/chat/completions")
async def llm_chat_completions(
req: ChatCompletionsRequest,
current_user: CurrentUser,
db: Database,
):
"""
Proxy Chat Completions to internal Ollama gateway.
Returns OpenAI-ish JSON or SSE when stream=true.
"""
sub = await _get_or_create_subscription(db, current_user.id)
_require_trader_or_higher(sub)
payload: dict[str, Any] = {
"model": (req.model or settings.llm_default_model),
"messages": [m.model_dump() for m in req.messages],
"temperature": req.temperature,
"stream": bool(req.stream),
}
try:
if req.stream:
return StreamingResponse(
chat_completions_stream(payload),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
)
data = await chat_completions(payload)
return JSONResponse(data)
except LLMGatewayError as e:
raise HTTPException(status_code=502, detail=str(e))