diff --git a/base_requirements.txt b/base_requirements.txt
index 6bb537a6a..68fca9851 100644
--- a/base_requirements.txt
+++ b/base_requirements.txt
@@ -125,3 +125,7 @@ tablib
# Timezone data (required by django-timezone-field on Python 3.9+)
# https://github.com/python/tzdata
tzdata
+
+# HTML sanitizer
+# https://github.com/mozilla/bleach
+bleach
\ No newline at end of file
diff --git a/netbox/utilities/templatetags/builtins/filters.py b/netbox/utilities/templatetags/builtins/filters.py
index 44ad5ac47..738dc0e00 100644
--- a/netbox/utilities/templatetags/builtins/filters.py
+++ b/netbox/utilities/templatetags/builtins/filters.py
@@ -11,7 +11,7 @@ from markdown import markdown
from netbox.config import get_config
from utilities.markdown import StrikethroughExtension
-from utilities.utils import foreground_color
+from utilities.utils import clean_html, foreground_color
register = template.Library()
@@ -144,18 +144,6 @@ def render_markdown(value):
{{ md_source_text|markdown }}
"""
- schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
-
- # Strip HTML tags
- value = strip_tags(value)
-
- # Sanitize Markdown links
- pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
- value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
-
- # Sanitize Markdown reference links
- pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
- value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
# Render Markdown
html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
@@ -164,6 +152,11 @@ def render_markdown(value):
if html:
html = f'
{html}
'
+ schemes = get_config().ALLOWED_URL_SCHEMES
+
+ # Sanitize HTML
+ html = clean_html(html, schemes)
+
return mark_safe(html)
diff --git a/netbox/utilities/utils.py b/netbox/utilities/utils.py
index bc6d928ed..2b939471c 100644
--- a/netbox/utilities/utils.py
+++ b/netbox/utilities/utils.py
@@ -4,6 +4,7 @@ from collections import OrderedDict
from decimal import Decimal
from itertools import count, groupby
+import bleach
from django.core.serializers import serialize
from django.db.models import Count, OuterRef, Subquery
from django.db.models.functions import Coalesce
@@ -385,3 +386,33 @@ def copy_safe_request(request):
'path': request.path,
'id': getattr(request, 'id', None), # UUID assigned by middleware
})
+
+
+def clean_html(html, schemes):
+ """
+ Sanitizes HTML based on a whitelist of allowed tags and attributes.
+ Also takes a list of allowed URI schemes.
+ """
+
+ ALLOWED_TAGS = [
+ "div", "pre", "code", "blockquote", "del",
+ "hr", "h1", "h2", "h3", "h4", "h5", "h6",
+ "ul", "ol", "li", "p", "br",
+ "strong", "em", "a", "b", "i", "img",
+ "table", "thead", "tbody", "tr", "th", "td",
+ "dl", "dt", "dd",
+ ]
+
+ ALLOWED_ATTRIBUTES = {
+ "div": ['class'],
+ "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
+ "a": ["href", "title"],
+ "img": ["src", "title", "alt"],
+ }
+
+ return bleach.clean(
+ html,
+ tags=ALLOWED_TAGS,
+ attributes=ALLOWED_ATTRIBUTES,
+ protocols=schemes
+ )
diff --git a/requirements.txt b/requirements.txt
index 293a33542..dbe7d70c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+bleach==5.0.0
Django==4.0.4
django-cors-headers==3.12.0
django-debug-toolbar==3.2.4