diff --git a/base_requirements.txt b/base_requirements.txt index 6bb537a6a..68fca9851 100644 --- a/base_requirements.txt +++ b/base_requirements.txt @@ -125,3 +125,7 @@ tablib # Timezone data (required by django-timezone-field on Python 3.9+) # https://github.com/python/tzdata tzdata + +# HTML sanitizer +# https://github.com/mozilla/bleach +bleach \ No newline at end of file diff --git a/netbox/utilities/templatetags/builtins/filters.py b/netbox/utilities/templatetags/builtins/filters.py index 44ad5ac47..738dc0e00 100644 --- a/netbox/utilities/templatetags/builtins/filters.py +++ b/netbox/utilities/templatetags/builtins/filters.py @@ -11,7 +11,7 @@ from markdown import markdown from netbox.config import get_config from utilities.markdown import StrikethroughExtension -from utilities.utils import foreground_color +from utilities.utils import clean_html, foreground_color register = template.Library() @@ -144,18 +144,6 @@ def render_markdown(value): {{ md_source_text|markdown }} """ - schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES) - - # Strip HTML tags - value = strip_tags(value) - - # Sanitize Markdown links - pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)' - value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE) - - # Sanitize Markdown reference links - pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)' - value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE) # Render Markdown html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()]) @@ -164,6 +152,11 @@ def render_markdown(value): if html: html = f'
{html}
' + schemes = get_config().ALLOWED_URL_SCHEMES + + # Sanitize HTML + html = clean_html(html, schemes) + return mark_safe(html) diff --git a/netbox/utilities/utils.py b/netbox/utilities/utils.py index bc6d928ed..2b939471c 100644 --- a/netbox/utilities/utils.py +++ b/netbox/utilities/utils.py @@ -4,6 +4,7 @@ from collections import OrderedDict from decimal import Decimal from itertools import count, groupby +import bleach from django.core.serializers import serialize from django.db.models import Count, OuterRef, Subquery from django.db.models.functions import Coalesce @@ -385,3 +386,33 @@ def copy_safe_request(request): 'path': request.path, 'id': getattr(request, 'id', None), # UUID assigned by middleware }) + + +def clean_html(html, schemes): + """ + Sanitizes HTML based on a whitelist of allowed tags and attributes. + Also takes a list of allowed URI schemes. + """ + + ALLOWED_TAGS = [ + "div", "pre", "code", "blockquote", "del", + "hr", "h1", "h2", "h3", "h4", "h5", "h6", + "ul", "ol", "li", "p", "br", + "strong", "em", "a", "b", "i", "img", + "table", "thead", "tbody", "tr", "th", "td", + "dl", "dt", "dd", + ] + + ALLOWED_ATTRIBUTES = { + "div": ['class'], + "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"], + "a": ["href", "title"], + "img": ["src", "title", "alt"], + } + + return bleach.clean( + html, + tags=ALLOWED_TAGS, + attributes=ALLOWED_ATTRIBUTES, + protocols=schemes + ) diff --git a/requirements.txt b/requirements.txt index 293a33542..dbe7d70c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +bleach==5.0.0 Django==4.0.4 django-cors-headers==3.12.0 django-debug-toolbar==3.2.4