pounce/backend/app/api/llm.py

"""
LLM API endpoints (Pounce -> Ollama Gateway).

This is intentionally a thin proxy:
- Enforces Pounce authentication (HttpOnly cookie)
- Enforces tier gating (Trader/Tycoon)
- Proxies to the internal LLM gateway (which talks to Ollama)
"""

from __future__ import annotations

from typing import Any, Literal, Optional

from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy import select

from app.api.deps import CurrentUser, Database
from app.config import get_settings
from app.models.subscription import Subscription, SubscriptionTier
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream


router = APIRouter(prefix="/llm", tags=["LLM"])
settings = get_settings()


class ChatMessage(BaseModel):
    role: Literal["system", "user", "assistant"]
    content: str


class ChatCompletionsRequest(BaseModel):
    model: Optional[str] = None
    messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
    temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
    stream: bool = False


async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
    res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
    sub = res.scalar_one_or_none()
    if sub:
        return sub
    sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
    db.add(sub)
    await db.commit()
    await db.refresh(sub)
    return sub


def _require_trader_or_higher(sub: Subscription) -> None:
    if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
        )


@router.post("/chat/completions")
async def llm_chat_completions(
    req: ChatCompletionsRequest,
    current_user: CurrentUser,
    db: Database,
):
    """
    Proxy Chat Completions to internal Ollama gateway.
    Returns OpenAI-ish JSON or SSE when stream=true.
    """
    sub = await _get_or_create_subscription(db, current_user.id)
    _require_trader_or_higher(sub)

    payload: dict[str, Any] = {
        "model": (req.model or settings.llm_default_model),
        "messages": [m.model_dump() for m in req.messages],
        "temperature": req.temperature,
        "stream": bool(req.stream),
    }

    try:
        if req.stream:
            return StreamingResponse(
                chat_completions_stream(payload),
                media_type="text/event-stream",
                headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
            )
        data = await chat_completions(payload)
        return JSONResponse(data)
    except LLMGatewayError as e:
        raise HTTPException(status_code=502, detail=str(e))