""" LLM API endpoints (Pounce -> Ollama Gateway). This is intentionally a thin proxy: - Enforces Pounce authentication (HttpOnly cookie) - Enforces tier gating (Trader/Tycoon) - Proxies to the internal LLM gateway (which talks to Ollama) """ from __future__ import annotations from typing import Any, Literal, Optional from fastapi import APIRouter, Depends, HTTPException, Query, Request, status from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field from sqlalchemy import select from app.api.deps import CurrentUser, Database from app.config import get_settings from app.models.subscription import Subscription, SubscriptionTier from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream router = APIRouter(prefix="/llm", tags=["LLM"]) settings = get_settings() class ChatMessage(BaseModel): role: Literal["system", "user", "assistant"] content: str class ChatCompletionsRequest(BaseModel): model: Optional[str] = None messages: list[ChatMessage] = Field(default_factory=list, min_length=1) temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) stream: bool = False async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription: res = await db.execute(select(Subscription).where(Subscription.user_id == user_id)) sub = res.scalar_one_or_none() if sub: return sub sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily") db.add(sub) await db.commit() await db.refresh(sub) return sub def _require_trader_or_higher(sub: Subscription) -> None: if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON): raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.", ) @router.post("/chat/completions") async def llm_chat_completions( req: ChatCompletionsRequest, current_user: CurrentUser, db: Database, ): """ Proxy Chat Completions to internal Ollama gateway. Returns OpenAI-ish JSON or SSE when stream=true. """ sub = await _get_or_create_subscription(db, current_user.id) _require_trader_or_higher(sub) payload: dict[str, Any] = { "model": (req.model or settings.llm_default_model), "messages": [m.model_dump() for m in req.messages], "temperature": req.temperature, "stream": bool(req.stream), } try: if req.stream: return StreamingResponse( chat_completions_stream(payload), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, ) data = await chat_completions(payload) return JSONResponse(data) except LLMGatewayError as e: raise HTTPException(status_code=502, detail=str(e))