1
0
mirror of https://github.com/netbox-community/netbox.git synced 2024-05-10 07:54:54 +00:00

Sanitize HTML after rendering markdown

This commit is contained in:
kkthxbye-code
2022-06-17 23:16:57 +02:00
parent e6018cd38f
commit 7c79c90cd2
4 changed files with 42 additions and 13 deletions

View File

@ -125,3 +125,7 @@ tablib
# Timezone data (required by django-timezone-field on Python 3.9+) # Timezone data (required by django-timezone-field on Python 3.9+)
# https://github.com/python/tzdata # https://github.com/python/tzdata
tzdata tzdata
# HTML sanitizer
# https://github.com/mozilla/bleach
bleach

View File

@ -11,7 +11,7 @@ from markdown import markdown
from netbox.config import get_config from netbox.config import get_config
from utilities.markdown import StrikethroughExtension from utilities.markdown import StrikethroughExtension
from utilities.utils import foreground_color from utilities.utils import clean_html, foreground_color
register = template.Library() register = template.Library()
@ -144,18 +144,6 @@ def render_markdown(value):
{{ md_source_text|markdown }} {{ md_source_text|markdown }}
""" """
schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
# Strip HTML tags
value = strip_tags(value)
# Sanitize Markdown links
pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
# Sanitize Markdown reference links
pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
# Render Markdown # Render Markdown
html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()]) html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
@ -164,6 +152,11 @@ def render_markdown(value):
if html: if html:
html = f'<div class="rendered-markdown">{html}</div>' html = f'<div class="rendered-markdown">{html}</div>'
schemes = get_config().ALLOWED_URL_SCHEMES
# Sanitize HTML
html = clean_html(html, schemes)
return mark_safe(html) return mark_safe(html)

View File

@ -4,6 +4,7 @@ from collections import OrderedDict
from decimal import Decimal from decimal import Decimal
from itertools import count, groupby from itertools import count, groupby
import bleach
from django.core.serializers import serialize from django.core.serializers import serialize
from django.db.models import Count, OuterRef, Subquery from django.db.models import Count, OuterRef, Subquery
from django.db.models.functions import Coalesce from django.db.models.functions import Coalesce
@ -385,3 +386,33 @@ def copy_safe_request(request):
'path': request.path, 'path': request.path,
'id': getattr(request, 'id', None), # UUID assigned by middleware 'id': getattr(request, 'id', None), # UUID assigned by middleware
}) })
def clean_html(html, schemes):
"""
Sanitizes HTML based on a whitelist of allowed tags and attributes.
Also takes a list of allowed URI schemes.
"""
ALLOWED_TAGS = [
"div", "pre", "code", "blockquote", "del",
"hr", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "li", "p", "br",
"strong", "em", "a", "b", "i", "img",
"table", "thead", "tbody", "tr", "th", "td",
"dl", "dt", "dd",
]
ALLOWED_ATTRIBUTES = {
"div": ['class'],
"h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
"a": ["href", "title"],
"img": ["src", "title", "alt"],
}
return bleach.clean(
html,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
protocols=schemes
)

View File

@ -1,3 +1,4 @@
bleach==5.0.0
Django==4.0.4 Django==4.0.4
django-cors-headers==3.12.0 django-cors-headers==3.12.0
django-debug-toolbar==3.2.4 django-debug-toolbar==3.2.4