diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index d366e4f..afba6a5 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router from app.api.hunt import router as hunt_router from app.api.cfo import router as cfo_router from app.api.drops import router as drops_router +from app.api.llm import router as llm_router api_router = APIRouter() @@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"]) api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"]) api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"]) api_router.include_router(drops_router, tags=["Drops - Zone Files"]) +api_router.include_router(llm_router, tags=["LLM"]) # Marketplace (For Sale) - from analysis_3.md api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"]) diff --git a/backend/app/api/llm.py b/backend/app/api/llm.py new file mode 100644 index 0000000..12fad4a --- /dev/null +++ b/backend/app/api/llm.py @@ -0,0 +1,93 @@ +""" +LLM API endpoints (Pounce -> Ollama Gateway). + +This is intentionally a thin proxy: +- Enforces Pounce authentication (HttpOnly cookie) +- Enforces tier gating (Trader/Tycoon) +- Proxies to the internal LLM gateway (which talks to Ollama) +""" + +from __future__ import annotations + +from typing import Any, Literal, Optional + +from fastapi import APIRouter, Depends, HTTPException, Query, Request, status +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel, Field +from sqlalchemy import select + +from app.api.deps import CurrentUser, Database +from app.config import get_settings +from app.models.subscription import Subscription, SubscriptionTier +from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream + + +router = APIRouter(prefix="/llm", tags=["LLM"]) +settings = get_settings() + + +class ChatMessage(BaseModel): + role: Literal["system", "user", "assistant"] + content: str + + +class ChatCompletionsRequest(BaseModel): + model: Optional[str] = None + messages: list[ChatMessage] = Field(default_factory=list, min_length=1) + temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) + stream: bool = False + + +async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription: + res = await db.execute(select(Subscription).where(Subscription.user_id == user_id)) + sub = res.scalar_one_or_none() + if sub: + return sub + sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily") + db.add(sub) + await db.commit() + await db.refresh(sub) + return sub + + +def _require_trader_or_higher(sub: Subscription) -> None: + if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON): + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.", + ) + + +@router.post("/chat/completions") +async def llm_chat_completions( + req: ChatCompletionsRequest, + current_user: CurrentUser, + db: Database, +): + """ + Proxy Chat Completions to internal Ollama gateway. + Returns OpenAI-ish JSON or SSE when stream=true. + """ + sub = await _get_or_create_subscription(db, current_user.id) + _require_trader_or_higher(sub) + + payload: dict[str, Any] = { + "model": (req.model or settings.llm_default_model), + "messages": [m.model_dump() for m in req.messages], + "temperature": req.temperature, + "stream": bool(req.stream), + } + + try: + if req.stream: + return StreamingResponse( + chat_completions_stream(payload), + media_type="text/event-stream", + headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}, + ) + data = await chat_completions(payload) + return JSONResponse(data) + except LLMGatewayError as e: + raise HTTPException(status_code=502, detail=str(e)) + + diff --git a/backend/app/config.py b/backend/app/config.py index a9fc3ea..e389e7a 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -116,6 +116,13 @@ class Settings(BaseSettings): # Moz API (SEO Data) moz_access_id: str = "" moz_secret_key: str = "" + + # ================================= + # LLM Gateway (Ollama / Mistral Nemo) + # ================================= + llm_gateway_url: str = "http://127.0.0.1:8812" # reverse-tunnel default on Pounce server + llm_gateway_api_key: str = "" + llm_default_model: str = "mistral-nemo:latest" # ICANN CZDS (Centralized Zone Data Service) # For downloading gTLD zone files (.com, .net, .org, etc.) diff --git a/backend/app/services/llm_gateway.py b/backend/app/services/llm_gateway.py new file mode 100644 index 0000000..2a8a3b0 --- /dev/null +++ b/backend/app/services/llm_gateway.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +import json +from typing import Any, AsyncIterator, Optional + +import httpx + +from app.config import get_settings + + +settings = get_settings() + + +class LLMGatewayError(RuntimeError): + pass + + +def _auth_headers() -> dict[str, str]: + key = (settings.llm_gateway_api_key or "").strip() + if not key: + raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)") + return {"Authorization": f"Bearer {key}"} + + +async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]: + """ + Non-streaming call to the LLM gateway (OpenAI-ish format). + """ + url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions" + async with httpx.AsyncClient(timeout=60) as client: + r = await client.post(url, headers=_auth_headers(), json=payload) + if r.status_code >= 400: + raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}") + return r.json() + + +async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]: + """ + Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through. + """ + url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions" + timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10) + async with httpx.AsyncClient(timeout=timeout) as client: + async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r: + if r.status_code >= 400: + body = await r.aread() + raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}") + + async for chunk in r.aiter_bytes(): + if chunk: + yield chunk + +