pounce/backend/app/services/llm_agent.py
Yves Gugger 442c1db580
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Hunter Companion: fix hallucination (strict no-invent rules), better text formatting with spacing
2025-12-17 14:56:39 +01:00

222 lines
7.4 KiB
Python

from __future__ import annotations
import json
from typing import Any, AsyncIterator, Optional
from sqlalchemy.ext.asyncio import AsyncSession
from app.config import get_settings
from app.models.subscription import Subscription, SubscriptionTier
from app.models.user import User
from app.services.llm_gateway import chat_completions, chat_completions_stream
from app.services.llm_tools import execute_tool, tool_catalog_for_prompt
settings = get_settings()
def _is_greeting(text: str) -> bool:
t = (text or "").strip().lower()
if not t:
return False
# common minimal greetings
greetings = {
"hi",
"hello",
"hey",
"yo",
"sup",
"hola",
"hallo",
"guten tag",
"good morning",
"good evening",
"good afternoon",
}
if t in greetings:
return True
# very short greeting-like messages
if len(t) <= 6 and t.replace("!", "").replace(".", "") in greetings:
return True
return False
def _tier_level(tier: str) -> int:
t = (tier or "").lower()
if t == "tycoon":
return 3
if t == "trader":
return 2
return 1
async def _get_user_tier(db: AsyncSession, user: User) -> str:
from sqlalchemy import select
res = await db.execute(select(Subscription).where(Subscription.user_id == user.id))
sub = res.scalar_one_or_none()
if not sub:
return "scout"
return sub.tier.value
def _build_system_prompt(path: str) -> str:
tools = tool_catalog_for_prompt(path)
return (
"You are the Pounce Hunter Companion, a domain trading expert. Always respond in English.\n\n"
"CRITICAL RULES:\n"
"1. NEVER invent or hallucinate data. You do NOT have access to SEMrush, Estibot, GoDaddy sales, or external databases.\n"
"2. If you don't have data, say so honestly. Only use data from tools you actually called.\n"
"3. Keep responses SHORT: 2-3 sentences max, then bullets if needed.\n"
"4. NO markdown: no ** or *, no code blocks, no headers with #.\n"
"5. Use dashes (-) for bullet points.\n\n"
"WHAT YOU CAN DO:\n"
"- Analyze domains using the analyze_domain tool (gives Pounce Score, risk, value estimate)\n"
"- Show user's watchlist, portfolio, listings, inbox, yield data\n"
"- Search auctions and drops\n"
"- Generate brandable names\n\n"
"WHAT YOU CANNOT DO:\n"
"- Access external sales databases or SEO tools\n"
"- Look up real-time WHOIS or DNS (unless via tool)\n"
"- Make up sales history or traffic stats\n\n"
"TOOL USAGE:\n"
"- To call a tool, respond with ONLY: {\"tool_calls\":[{\"name\":\"...\",\"args\":{...}}]}\n"
"- After tool results, summarize briefly without mentioning tools.\n\n"
f"TOOLS:\n{json.dumps(tools, ensure_ascii=False)}\n"
)
def _try_parse_tool_calls(text: str) -> Optional[list[dict[str, Any]]]:
t = (text or "").strip()
if not (t.startswith("{") and "tool_calls" in t):
return None
try:
obj = json.loads(t)
except Exception:
return None
calls = obj.get("tool_calls")
if not isinstance(calls, list):
return None
out: list[dict[str, Any]] = []
for c in calls:
if not isinstance(c, dict):
continue
name = c.get("name")
args = c.get("args") or {}
if isinstance(name, str) and isinstance(args, dict):
out.append({"name": name, "args": args})
return out or None
def _truncate_json(value: Any, max_chars: int = 8000) -> str:
s = json.dumps(value, ensure_ascii=False)
if len(s) <= max_chars:
return s
return s[: max_chars - 3] + "..."
async def run_agent(
db: AsyncSession,
user: User,
*,
messages: list[dict[str, Any]],
path: str,
model: Optional[str] = None,
temperature: float = 0.7,
max_steps: int = 6,
) -> list[dict[str, Any]]:
"""
Runs a small tool loop to augment context, returning final messages to be used
for the final answer generation (optionally streamed).
"""
tier = await _get_user_tier(db, user)
if _tier_level(tier) < 2:
raise PermissionError("Chat is available on Trader and Tycoon plans. Upgrade to unlock.")
base = [
{"role": "system", "content": _build_system_prompt(path)},
{"role": "system", "content": f"Context: current_terminal_path={path}; tier={tier}."},
]
convo = base + (messages or [])
# If the user just greets, answer naturally without tool-looping.
last_user = next((m for m in reversed(messages or []) if m.get("role") == "user"), None)
if last_user and _is_greeting(str(last_user.get("content") or "")):
convo.append(
{
"role": "assistant",
"content": (
"Hey! What can I help you with?\n\n"
"Give me a domain to analyze, or ask about your watchlist, portfolio, or current auctions."
),
}
)
return convo
for _ in range(max_steps):
payload = {
"model": model or settings.llm_default_model,
"messages": convo,
"temperature": temperature,
"stream": False,
}
res = await chat_completions(payload)
content = (res.get("choices") or [{}])[0].get("message", {}).get("content", "") or ""
tool_calls = _try_parse_tool_calls(content)
if not tool_calls:
# append assistant and stop
convo.append({"role": "assistant", "content": content})
return convo
# append the tool request as assistant message (so model can see its own plan)
convo.append({"role": "assistant", "content": content})
for call in tool_calls[:5]: # cap per step
name = call["name"]
args = call["args"]
result = await execute_tool(db, user, name, args, path=path)
convo.append(
{
"role": "system",
"content": (
f"TOOL_RESULT_INTERNAL name={name} json={_truncate_json(result)}. "
"This is internal context. Do NOT quote or display this to the user."
),
}
)
# Fallback: force final answer even if tool loop didn't converge
convo.append(
{
"role": "system",
"content": "Now answer the user with the best possible answer using the tool results. Do NOT request tools.",
}
)
return convo
async def stream_final_answer(convo: list[dict[str, Any]], *, model: Optional[str], temperature: float) -> AsyncIterator[bytes]:
payload = {
"model": model or settings.llm_default_model,
"messages": convo
+ [
{
"role": "system",
"content": (
"Respond now. Rules:\n"
"- NEVER invent data. Only use data from tools you called.\n"
"- Keep it SHORT: 2-3 sentences, then bullet points if needed.\n"
"- NO markdown (no ** or *), just plain text with dashes for bullets.\n"
"- Do NOT mention tools or JSON."
),
}
],
"temperature": temperature,
"stream": True,
}
async for chunk in chat_completions_stream(payload):
yield chunk