52 lines
814 B
Python
52 lines
814 B
Python
"""
|
|
HTML sanitization utilities.
|
|
|
|
Goal: prevent XSS when rendering stored HTML (e.g. blog posts) via dangerouslySetInnerHTML.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import bleach
|
|
|
|
_ALLOWED_TAGS = [
|
|
"p",
|
|
"br",
|
|
"hr",
|
|
"blockquote",
|
|
"pre",
|
|
"code",
|
|
"strong",
|
|
"em",
|
|
"ul",
|
|
"ol",
|
|
"li",
|
|
"h1",
|
|
"h2",
|
|
"h3",
|
|
"h4",
|
|
"h5",
|
|
"h6",
|
|
"a",
|
|
]
|
|
|
|
_ALLOWED_ATTRIBUTES = {
|
|
"a": ["href", "title", "rel", "target"],
|
|
}
|
|
|
|
_ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
|
|
|
|
|
|
def sanitize_html(html: str) -> str:
|
|
"""Sanitize potentially unsafe HTML input."""
|
|
if not html:
|
|
return ""
|
|
return bleach.clean(
|
|
html,
|
|
tags=_ALLOWED_TAGS,
|
|
attributes=_ALLOWED_ATTRIBUTES,
|
|
protocols=_ALLOWED_PROTOCOLS,
|
|
strip=True,
|
|
)
|
|
|
|
|