Add LLM gateway proxy endpoint (Trader/Tycoon)
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
This commit is contained in:
@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router
|
||||
from app.api.hunt import router as hunt_router
|
||||
from app.api.cfo import router as cfo_router
|
||||
from app.api.drops import router as drops_router
|
||||
from app.api.llm import router as llm_router
|
||||
|
||||
api_router = APIRouter()
|
||||
|
||||
@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"])
|
||||
api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
|
||||
api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
|
||||
api_router.include_router(drops_router, tags=["Drops - Zone Files"])
|
||||
api_router.include_router(llm_router, tags=["LLM"])
|
||||
|
||||
# Marketplace (For Sale) - from analysis_3.md
|
||||
api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])
|
||||
|
||||
93
backend/app/api/llm.py
Normal file
93
backend/app/api/llm.py
Normal file
@ -0,0 +1,93 @@
|
||||
"""
|
||||
LLM API endpoints (Pounce -> Ollama Gateway).
|
||||
|
||||
This is intentionally a thin proxy:
|
||||
- Enforces Pounce authentication (HttpOnly cookie)
|
||||
- Enforces tier gating (Trader/Tycoon)
|
||||
- Proxies to the internal LLM gateway (which talks to Ollama)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.api.deps import CurrentUser, Database
|
||||
from app.config import get_settings
|
||||
from app.models.subscription import Subscription, SubscriptionTier
|
||||
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
|
||||
|
||||
|
||||
router = APIRouter(prefix="/llm", tags=["LLM"])
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: Literal["system", "user", "assistant"]
|
||||
content: str
|
||||
|
||||
|
||||
class ChatCompletionsRequest(BaseModel):
|
||||
model: Optional[str] = None
|
||||
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
|
||||
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
|
||||
stream: bool = False
|
||||
|
||||
|
||||
async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
|
||||
res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
|
||||
sub = res.scalar_one_or_none()
|
||||
if sub:
|
||||
return sub
|
||||
sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
|
||||
db.add(sub)
|
||||
await db.commit()
|
||||
await db.refresh(sub)
|
||||
return sub
|
||||
|
||||
|
||||
def _require_trader_or_higher(sub: Subscription) -> None:
|
||||
if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
|
||||
)
|
||||
|
||||
|
||||
@router.post("/chat/completions")
|
||||
async def llm_chat_completions(
|
||||
req: ChatCompletionsRequest,
|
||||
current_user: CurrentUser,
|
||||
db: Database,
|
||||
):
|
||||
"""
|
||||
Proxy Chat Completions to internal Ollama gateway.
|
||||
Returns OpenAI-ish JSON or SSE when stream=true.
|
||||
"""
|
||||
sub = await _get_or_create_subscription(db, current_user.id)
|
||||
_require_trader_or_higher(sub)
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"model": (req.model or settings.llm_default_model),
|
||||
"messages": [m.model_dump() for m in req.messages],
|
||||
"temperature": req.temperature,
|
||||
"stream": bool(req.stream),
|
||||
}
|
||||
|
||||
try:
|
||||
if req.stream:
|
||||
return StreamingResponse(
|
||||
chat_completions_stream(payload),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||
)
|
||||
data = await chat_completions(payload)
|
||||
return JSONResponse(data)
|
||||
except LLMGatewayError as e:
|
||||
raise HTTPException(status_code=502, detail=str(e))
|
||||
|
||||
|
||||
@ -116,6 +116,13 @@ class Settings(BaseSettings):
|
||||
# Moz API (SEO Data)
|
||||
moz_access_id: str = ""
|
||||
moz_secret_key: str = ""
|
||||
|
||||
# =================================
|
||||
# LLM Gateway (Ollama / Mistral Nemo)
|
||||
# =================================
|
||||
llm_gateway_url: str = "http://127.0.0.1:8812" # reverse-tunnel default on Pounce server
|
||||
llm_gateway_api_key: str = ""
|
||||
llm_default_model: str = "mistral-nemo:latest"
|
||||
|
||||
# ICANN CZDS (Centralized Zone Data Service)
|
||||
# For downloading gTLD zone files (.com, .net, .org, etc.)
|
||||
|
||||
53
backend/app/services/llm_gateway.py
Normal file
53
backend/app/services/llm_gateway.py
Normal file
@ -0,0 +1,53 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, AsyncIterator, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
class LLMGatewayError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _auth_headers() -> dict[str, str]:
|
||||
key = (settings.llm_gateway_api_key or "").strip()
|
||||
if not key:
|
||||
raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)")
|
||||
return {"Authorization": f"Bearer {key}"}
|
||||
|
||||
|
||||
async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Non-streaming call to the LLM gateway (OpenAI-ish format).
|
||||
"""
|
||||
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
r = await client.post(url, headers=_auth_headers(), json=payload)
|
||||
if r.status_code >= 400:
|
||||
raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}")
|
||||
return r.json()
|
||||
|
||||
|
||||
async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]:
|
||||
"""
|
||||
Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through.
|
||||
"""
|
||||
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
|
||||
timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10)
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r:
|
||||
if r.status_code >= 400:
|
||||
body = await r.aread()
|
||||
raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}")
|
||||
|
||||
async for chunk in r.aiter_bytes():
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
|
||||
Reference in New Issue
Block a user