pounce/backend/app/services/hunt/trends.py

"""
Trend Surfer: fetch trending search queries via public RSS (no API key).

Note: This still performs an external HTTP request to Google Trends RSS.
It's not a paid API and uses public endpoints.
"""

from __future__ import annotations

from datetime import datetime, timezone
from typing import Optional
from xml.etree import ElementTree as ET

import httpx


async def fetch_google_trends_daily_rss(geo: str = "US", *, timeout_seconds: float = 10.0) -> list[dict]:
    geo = (geo or "US").upper().strip()
    # Google has changed/retired older RSS paths (e.g. /trends/trendingsearches/daily/rss).
    # This endpoint is currently the stable public feed for daily search trends.
    url = f"https://trends.google.com/trending/rss?geo={geo}"
    headers = {
        # Use a browser-like UA; Google returns 404 for some automated clients otherwise.
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36",
        "Accept": "application/rss+xml, application/xml;q=0.9, text/xml;q=0.8, */*;q=0.5",
    }
    async with httpx.AsyncClient(timeout=timeout_seconds, follow_redirects=True, headers=headers) as client:
        res = await client.get(url)
        res.raise_for_status()
        xml = res.text

    root = ET.fromstring(xml)

    items: list[dict] = []
    for item in root.findall(".//item"):
        title = item.findtext("title") or ""
        link = item.findtext("link")
        pub = item.findtext("pubDate")
        approx = item.findtext("{*}approx_traffic")  # namespaced

        published_at: Optional[datetime] = None
        if pub:
            try:
                # Example: "Sat, 14 Dec 2025 00:00:00 +0000"
                published_at = datetime.strptime(pub, "%a, %d %b %Y %H:%M:%S %z").astimezone(timezone.utc)
            except Exception:
                published_at = None

        if title.strip():
            items.append(
                {
                    "title": title.strip(),
                    "approx_traffic": approx.strip() if approx else None,
                    "published_at": published_at,
                    "link": link,
                }
            )

    return items