Add LLM gateway proxy endpoint (Trader/Tycoon)
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
This commit is contained in:
@ -27,6 +27,7 @@ from app.api.analyze import router as analyze_router
|
|||||||
from app.api.hunt import router as hunt_router
|
from app.api.hunt import router as hunt_router
|
||||||
from app.api.cfo import router as cfo_router
|
from app.api.cfo import router as cfo_router
|
||||||
from app.api.drops import router as drops_router
|
from app.api.drops import router as drops_router
|
||||||
|
from app.api.llm import router as llm_router
|
||||||
|
|
||||||
api_router = APIRouter()
|
api_router = APIRouter()
|
||||||
|
|
||||||
@ -45,6 +46,7 @@ api_router.include_router(analyze_router, prefix="/analyze", tags=["Analyze"])
|
|||||||
api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
|
api_router.include_router(hunt_router, prefix="/hunt", tags=["Hunt"])
|
||||||
api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
|
api_router.include_router(cfo_router, prefix="/cfo", tags=["CFO"])
|
||||||
api_router.include_router(drops_router, tags=["Drops - Zone Files"])
|
api_router.include_router(drops_router, tags=["Drops - Zone Files"])
|
||||||
|
api_router.include_router(llm_router, tags=["LLM"])
|
||||||
|
|
||||||
# Marketplace (For Sale) - from analysis_3.md
|
# Marketplace (For Sale) - from analysis_3.md
|
||||||
api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])
|
api_router.include_router(listings_router, prefix="/listings", tags=["Marketplace - For Sale"])
|
||||||
|
|||||||
93
backend/app/api/llm.py
Normal file
93
backend/app/api/llm.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
"""
|
||||||
|
LLM API endpoints (Pounce -> Ollama Gateway).
|
||||||
|
|
||||||
|
This is intentionally a thin proxy:
|
||||||
|
- Enforces Pounce authentication (HttpOnly cookie)
|
||||||
|
- Enforces tier gating (Trader/Tycoon)
|
||||||
|
- Proxies to the internal LLM gateway (which talks to Ollama)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||||
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from sqlalchemy import select
|
||||||
|
|
||||||
|
from app.api.deps import CurrentUser, Database
|
||||||
|
from app.config import get_settings
|
||||||
|
from app.models.subscription import Subscription, SubscriptionTier
|
||||||
|
from app.services.llm_gateway import LLMGatewayError, chat_completions, chat_completions_stream
|
||||||
|
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/llm", tags=["LLM"])
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
|
||||||
|
class ChatMessage(BaseModel):
|
||||||
|
role: Literal["system", "user", "assistant"]
|
||||||
|
content: str
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionsRequest(BaseModel):
|
||||||
|
model: Optional[str] = None
|
||||||
|
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
|
||||||
|
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
|
||||||
|
stream: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_or_create_subscription(db: Database, user_id: int) -> Subscription:
|
||||||
|
res = await db.execute(select(Subscription).where(Subscription.user_id == user_id))
|
||||||
|
sub = res.scalar_one_or_none()
|
||||||
|
if sub:
|
||||||
|
return sub
|
||||||
|
sub = Subscription(user_id=user_id, tier=SubscriptionTier.SCOUT, max_domains=5, check_frequency="daily")
|
||||||
|
db.add(sub)
|
||||||
|
await db.commit()
|
||||||
|
await db.refresh(sub)
|
||||||
|
return sub
|
||||||
|
|
||||||
|
|
||||||
|
def _require_trader_or_higher(sub: Subscription) -> None:
|
||||||
|
if sub.tier not in (SubscriptionTier.TRADER, SubscriptionTier.TYCOON):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
detail="Chat is available on Trader and Tycoon plans. Upgrade to unlock.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/chat/completions")
|
||||||
|
async def llm_chat_completions(
|
||||||
|
req: ChatCompletionsRequest,
|
||||||
|
current_user: CurrentUser,
|
||||||
|
db: Database,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Proxy Chat Completions to internal Ollama gateway.
|
||||||
|
Returns OpenAI-ish JSON or SSE when stream=true.
|
||||||
|
"""
|
||||||
|
sub = await _get_or_create_subscription(db, current_user.id)
|
||||||
|
_require_trader_or_higher(sub)
|
||||||
|
|
||||||
|
payload: dict[str, Any] = {
|
||||||
|
"model": (req.model or settings.llm_default_model),
|
||||||
|
"messages": [m.model_dump() for m in req.messages],
|
||||||
|
"temperature": req.temperature,
|
||||||
|
"stream": bool(req.stream),
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
if req.stream:
|
||||||
|
return StreamingResponse(
|
||||||
|
chat_completions_stream(payload),
|
||||||
|
media_type="text/event-stream",
|
||||||
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
|
||||||
|
)
|
||||||
|
data = await chat_completions(payload)
|
||||||
|
return JSONResponse(data)
|
||||||
|
except LLMGatewayError as e:
|
||||||
|
raise HTTPException(status_code=502, detail=str(e))
|
||||||
|
|
||||||
|
|
||||||
@ -117,6 +117,13 @@ class Settings(BaseSettings):
|
|||||||
moz_access_id: str = ""
|
moz_access_id: str = ""
|
||||||
moz_secret_key: str = ""
|
moz_secret_key: str = ""
|
||||||
|
|
||||||
|
# =================================
|
||||||
|
# LLM Gateway (Ollama / Mistral Nemo)
|
||||||
|
# =================================
|
||||||
|
llm_gateway_url: str = "http://127.0.0.1:8812" # reverse-tunnel default on Pounce server
|
||||||
|
llm_gateway_api_key: str = ""
|
||||||
|
llm_default_model: str = "mistral-nemo:latest"
|
||||||
|
|
||||||
# ICANN CZDS (Centralized Zone Data Service)
|
# ICANN CZDS (Centralized Zone Data Service)
|
||||||
# For downloading gTLD zone files (.com, .net, .org, etc.)
|
# For downloading gTLD zone files (.com, .net, .org, etc.)
|
||||||
# Register at: https://czds.icann.org/
|
# Register at: https://czds.icann.org/
|
||||||
|
|||||||
53
backend/app/services/llm_gateway.py
Normal file
53
backend/app/services/llm_gateway.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from typing import Any, AsyncIterator, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
|
||||||
|
class LLMGatewayError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _auth_headers() -> dict[str, str]:
|
||||||
|
key = (settings.llm_gateway_api_key or "").strip()
|
||||||
|
if not key:
|
||||||
|
raise LLMGatewayError("LLM gateway not configured (missing llm_gateway_api_key)")
|
||||||
|
return {"Authorization": f"Bearer {key}"}
|
||||||
|
|
||||||
|
|
||||||
|
async def chat_completions(payload: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Non-streaming call to the LLM gateway (OpenAI-ish format).
|
||||||
|
"""
|
||||||
|
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
|
||||||
|
async with httpx.AsyncClient(timeout=60) as client:
|
||||||
|
r = await client.post(url, headers=_auth_headers(), json=payload)
|
||||||
|
if r.status_code >= 400:
|
||||||
|
raise LLMGatewayError(f"LLM gateway error: {r.status_code} {r.text[:500]}")
|
||||||
|
return r.json()
|
||||||
|
|
||||||
|
|
||||||
|
async def chat_completions_stream(payload: dict[str, Any]) -> AsyncIterator[bytes]:
|
||||||
|
"""
|
||||||
|
Streaming call to the LLM gateway. The gateway returns SSE; we proxy bytes through.
|
||||||
|
"""
|
||||||
|
url = settings.llm_gateway_url.rstrip("/") + "/v1/chat/completions"
|
||||||
|
timeout = httpx.Timeout(connect=10, read=None, write=10, pool=10)
|
||||||
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||||
|
async with client.stream("POST", url, headers=_auth_headers(), json=payload) as r:
|
||||||
|
if r.status_code >= 400:
|
||||||
|
body = await r.aread()
|
||||||
|
raise LLMGatewayError(f"LLM gateway stream error: {r.status_code} {body[:500].decode('utf-8','ignore')}")
|
||||||
|
|
||||||
|
async for chunk in r.aiter_bytes():
|
||||||
|
if chunk:
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user