Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
94 lines
2.9 KiB
Python
94 lines
2.9 KiB
Python
"""
|
|
LLM API endpoints (Pounce -> Ollama Gateway).
|
|
|
|
This is intentionally a thin proxy:
|
|
- Enforces Pounce authentication (HttpOnly cookie)
|
|
- Enforces tier gating (Trader/Tycoon)
|
|
- Proxies to the internal LLM gateway (which talks to Ollama)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Literal, Optional
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
from pydantic import BaseModel, Field
|
|
from sqlalchemy import select
|
|
|
|
from app.api.deps import CurrentUser, Database
|
|
from app.config import get_settings
|
|
from app.models.subscription import Subscription, SubscriptionTier
|
|
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
|
|
|
|
|
|
router = APIRouter(prefix="/llm", tags=["LLM"])
|
|
settings = get_settings()
|
|
|
|
|
|
class ChatMessage(BaseModel):
|
|
role: Literal["system", "user", "assistant"]
|
|
content: str
|
|
|
|
|
|
class ChatCompletionsRequest(BaseModel):
|
|
model: Optional[str] = None
|
|
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
|
|
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
|
|
stream: bool = False
|
|
|
|
|
|
async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
|
|
res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
|
|
sub = res.scalar_one_or_none()
|
|
if sub:
|
|
return sub
|
|
sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
|
|
db.add(sub)
|
|
await db.commit()
|
|
await db.refresh(sub)
|
|
return sub
|
|
|
|
|
|
def _require_trader_or_higher(sub: Subscription) -> None:
|
|
if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
|
|
)
|
|
|
|
|
|
@router.post("/chat/completions")
|
|
async def llm_chat_completions(
|
|
req: ChatCompletionsRequest,
|
|
current_user: CurrentUser,
|
|
db: Database,
|
|
):
|
|
"""
|
|
Proxy Chat Completions to internal Ollama gateway.
|
|
Returns OpenAI-ish JSON or SSE when stream=true.
|
|
"""
|
|
sub = await _get_or_create_subscription(db, current_user.id)
|
|
_require_trader_or_higher(sub)
|
|
|
|
payload: dict[str, Any] = {
|
|
"model": (req.model or settings.llm_default_model),
|
|
"messages": [m.model_dump() for m in req.messages],
|
|
"temperature": req.temperature,
|
|
"stream": bool(req.stream),
|
|
}
|
|
|
|
try:
|
|
if req.stream:
|
|
return StreamingResponse(
|
|
chat_completions_stream(payload),
|
|
media_type="text/event-stream",
|
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
|
)
|
|
data = await chat_completions(payload)
|
|
return JSONResponse(data)
|
|
except LLMGatewayError as e:
|
|
raise HTTPException(status_code=502, detail=str(e))
|
|
|
|
|