pounce/backend/app/api/llm_agent.py
Yves Gugger e75c9bc9ef
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Hunter Companion v4: Code-First Architecture - no LLM for routing, pure pattern matching
2025-12-17 15:22:34 +01:00

122 lines
3.5 KiB
Python

"""
Hunter Companion API Endpoint
This is the main endpoint for the Hunter Companion chat.
Uses code-first architecture: intent detection via pattern matching,
tool execution, and template-based responses. LLM is NOT used for routing.
"""
from __future__ import annotations
import json
from typing import Any, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from app.api.deps import get_current_user
from app.database import get_db
from app.models.subscription import Subscription
from app.models.user import User
from app.services.hunter_companion import process_message
router = APIRouter(prefix="/llm", tags=["LLM"])
def _tier_level(tier: str) -> int:
t = (tier or "").lower()
if t == "tycoon":
return 3
if t == "trader":
return 2
return 1
async def _get_user_tier(db: AsyncSession, user: User) -> str:
res = await db.execute(select(Subscription).where(Subscription.user_id == user.id))
sub = res.scalar_one_or_none()
if not sub:
return "scout"
return sub.tier.value
class ChatMessage(BaseModel):
role: Literal["system", "user", "assistant"]
content: str
class AgentRequest(BaseModel):
messages: list[ChatMessage] = Field(default_factory=list, min_length=1)
path: str = Field(default="/terminal/hunt")
model: Optional[str] = None
temperature: float = Field(default=0.3, ge=0.0, le=2.0)
stream: bool = True
async def _generate_sse_response(content: str):
"""Generate SSE-formatted response chunks."""
# Split content into chunks for streaming effect
chunk_size = 20
for i in range(0, len(content), chunk_size):
chunk = content[i:i + chunk_size]
data = {"choices": [{"delta": {"content": chunk}}]}
yield f"data: {json.dumps(data)}\n\n".encode()
yield b"data: [DONE]\n\n"
@router.post("/agent")
async def hunter_companion_chat(
payload: AgentRequest,
current_user: User = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
"""
Hunter Companion Chat Endpoint
- Trader/Tycoon: Full access to all features
- Scout: Blocked (403)
"""
# Check tier
tier = await _get_user_tier(db, current_user)
if _tier_level(tier) < 2:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Hunter Companion requires Trader or Tycoon plan."
)
# Get the last user message
user_messages = [m for m in payload.messages if m.role == "user"]
if not user_messages:
raise HTTPException(status_code=400, detail="No user message provided")
last_message = user_messages[-1].content
# Process the message (code-first, no LLM for routing)
try:
response = await process_message(
db=db,
user=current_user,
message=last_message,
path=payload.path,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Processing failed: {type(e).__name__}: {e}"
)
# Return as SSE stream (for frontend compatibility)
if payload.stream:
return StreamingResponse(
_generate_sse_response(response),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
)
# Non-stream response
return {"content": response}