pounce/backend/app/services/llm_agent.py

from __future__ import annotations

import json
from typing import Any, AsyncIterator, Optional

from sqlalchemy.ext.asyncio import AsyncSession

from app.config import get_settings
from app.models.subscription import Subscription, SubscriptionTier
from app.models.user import User
from app.services.llm_gateway import chat_completions, chat_completions_stream
from app.services.llm_tools import execute_tool, tool_catalog_for_prompt


settings = get_settings()


def _is_greeting(text: str) -> bool:
    t = (text or "").strip().lower()
    if not t:
        return False
    # common minimal greetings
    greetings = {
        "hi",
        "hello",
        "hey",
        "yo",
        "sup",
        "hola",
        "hallo",
        "guten tag",
        "good morning",
        "good evening",
        "good afternoon",
    }
    if t in greetings:
        return True
    # very short greeting-like messages
    if len(t) <= 6 and t.replace("!", "").replace(".", "") in greetings:
        return True
    return False


def _tier_level(tier: str) -> int:
    t = (tier or "").lower()
    if t == "tycoon":
        return 3
    if t == "trader":
        return 2
    return 1


async def _get_user_tier(db: AsyncSession, user: User) -> str:
    from sqlalchemy import select

    res = await db.execute(select(Subscription).where(Subscription.user_id == user.id))
    sub = res.scalar_one_or_none()
    if not sub:
        return "scout"
    return sub.tier.value


def _build_system_prompt(path: str) -> str:
    tools = tool_catalog_for_prompt(path)
    return (
        "You are the Pounce Hunter Companion (domain trading expert). Always respond in English.\n"
        "You have access to internal tools that return live data. Use tools when needed.\n\n"
        "OUTPUT STYLE:\n"
        "- Never show raw tool output to the user.\n"
        "- Never print phrases like 'Tool Result', 'TOOL_RESULT', or code-fenced JSON.\n"
        "- If you used tools, silently incorporate the data and present ONLY a clean summary.\n"
        "- Keep formatting simple: short paragraphs and bullet points. Avoid dumping structured data.\n\n"
        "BEHAVIOR:\n"
        "- Do NOT invent user preferences, keywords, TLDs, budgets, or tasks.\n"
        "- If the user greets you or sends a minimal message (e.g., 'hi', 'hello'), respond naturally and ask what they want help with.\n"
        "- Ask 1–2 clarifying questions when the user request is ambiguous.\n\n"
        "WHEN TO USE TOOLS:\n"
        "- Use tools only when the user explicitly asks about their account data, their current page, their lists (watchlist/portfolio/listings/inbox/yield), or when they provide a specific domain to analyze.\n"
        "- Never proactively 'fetch' domains or run scans based on guessed keywords.\n\n"
        "TOOL CALLING PROTOCOL:\n"
        "- If you need data, respond with ONLY a JSON object:\n"
        '  {"tool_calls":[{"name":"tool_name","args":{...}}, ...]}\n'
        "- Do not include any other text when requesting tools.\n"
        "- After tools are executed, you will receive TOOL_RESULT messages.\n"
        "- When you are ready to answer the user, respond normally (not JSON) and do NOT mention tools.\n\n"
        "AVAILABLE TOOLS (JSON schemas):\n"
        f"{json.dumps(tools, ensure_ascii=False)}\n\n"
        "RULES:\n"
        "- Never claim you checked external sources unless the user provided the data.\n"
        "- Keep answers practical and decisive. If (and only if) the user is asking about a specific domain: include BUY/CONSIDER/SKIP + bullets.\n"
    )


def _try_parse_tool_calls(text: str) -> Optional[list[dict[str, Any]]]:
    t = (text or "").strip()
    if not (t.startswith("{") and "tool_calls" in t):
        return None
    try:
        obj = json.loads(t)
    except Exception:
        return None
    calls = obj.get("tool_calls")
    if not isinstance(calls, list):
        return None
    out: list[dict[str, Any]] = []
    for c in calls:
        if not isinstance(c, dict):
            continue
        name = c.get("name")
        args = c.get("args") or {}
        if isinstance(name, str) and isinstance(args, dict):
            out.append({"name": name, "args": args})
    return out or None


def _truncate_json(value: Any, max_chars: int = 8000) -> str:
    s = json.dumps(value, ensure_ascii=False)
    if len(s) <= max_chars:
        return s
    return s[: max_chars - 3] + "..."


async def run_agent(
    db: AsyncSession,
    user: User,
    *,
    messages: list[dict[str, Any]],
    path: str,
    model: Optional[str] = None,
    temperature: float = 0.7,
    max_steps: int = 6,
) -> list[dict[str, Any]]:
    """
    Runs a small tool loop to augment context, returning final messages to be used
    for the final answer generation (optionally streamed).
    """
    tier = await _get_user_tier(db, user)
    if _tier_level(tier) < 2:
        raise PermissionError("Chat is available on Trader and Tycoon plans. Upgrade to unlock.")

    base = [
        {"role": "system", "content": _build_system_prompt(path)},
        {"role": "system", "content": f"Context: current_terminal_path={path}; tier={tier}."},
    ]
    convo = base + (messages or [])

    # If the user just greets, answer naturally without tool-looping.
    last_user = next((m for m in reversed(messages or []) if m.get("role") == "user"), None)
    if last_user and _is_greeting(str(last_user.get("content") or "")):
        convo.append(
            {
                "role": "assistant",
                "content": (
                    "Hey — how can I help?\n\n"
                    "If you want, tell me:\n"
                    "- which Terminal page you’re on (or what you’re trying to do)\n"
                    "- or a specific domain you’re considering\n"
                    "- or what outcome you want (find deals, assess a name, manage leads, etc.)"
                ),
            }
        )
        return convo

    for _ in range(max_steps):
        payload = {
            "model": model or settings.llm_default_model,
            "messages": convo,
            "temperature": temperature,
            "stream": False,
        }
        res = await chat_completions(payload)
        content = (res.get("choices") or [{}])[0].get("message", {}).get("content", "") or ""

        tool_calls = _try_parse_tool_calls(content)
        if not tool_calls:
            # append assistant and stop
            convo.append({"role": "assistant", "content": content})
            return convo

        # append the tool request as assistant message (so model can see its own plan)
        convo.append({"role": "assistant", "content": content})

        for call in tool_calls[:5]:  # cap per step
            name = call["name"]
            args = call["args"]
            result = await execute_tool(db, user, name, args, path=path)
            convo.append(
                {
                    "role": "system",
                    "content": (
                        f"TOOL_RESULT_INTERNAL name={name} json={_truncate_json(result)}. "
                        "This is internal context. Do NOT quote or display this to the user."
                    ),
                }
            )

    # Fallback: force final answer even if tool loop didn't converge
    convo.append(
        {
            "role": "system",
            "content": "Now answer the user with the best possible answer using the tool results. Do NOT request tools.",
        }
    )
    return convo


async def stream_final_answer(convo: list[dict[str, Any]], *, model: Optional[str], temperature: float) -> AsyncIterator[bytes]:
    payload = {
        "model": model or settings.llm_default_model,
        "messages": convo
        + [
            {
                "role": "system",
                "content": (
                    "Final step: respond to the user.\n"
                    "- Do NOT output JSON tool_calls.\n"
                    "- Do NOT request tools.\n"
                    "- Do NOT include raw tool outputs, internal tags, or code-fenced JSON.\n"
                    "- If you used tools, present only a clean human summary."
                ),
            }
        ],
        "temperature": temperature,
        "stream": True,
    }
    async for chunk in chat_completions_stream(payload):
        yield chunk