pounce/backend/app/services/html_sanitizer.py

52 lines
814 B
Python

"""
HTML sanitization utilities.
Goal: prevent XSS when rendering stored HTML (e.g. blog posts) via dangerouslySetInnerHTML.
"""
from __future__ import annotations
import bleach
_ALLOWED_TAGS = [
"p",
"br",
"hr",
"blockquote",
"pre",
"code",
"strong",
"em",
"ul",
"ol",
"li",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"a",
]
_ALLOWED_ATTRIBUTES = {
"a": ["href", "title", "rel", "target"],
}
_ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
def sanitize_html(html: str) -> str:
"""Sanitize potentially unsafe HTML input."""
if not html:
return ""
return bleach.clean(
html,
tags=_ALLOWED_TAGS,
attributes=_ALLOWED_ATTRIBUTES,
protocols=_ALLOWED_PROTOCOLS,
strip=True,
)