1
0
mirror of https://github.com/netbox-community/netbox.git synced 2024-05-10 07:54:54 +00:00

Sanitize HTML after rendering markdown

This commit is contained in:
kkthxbye-code
2022-06-17 23:16:57 +02:00
parent e6018cd38f
commit 7c79c90cd2
4 changed files with 42 additions and 13 deletions

View File

@ -125,3 +125,7 @@ tablib
# Timezone data (required by django-timezone-field on Python 3.9+)
# https://github.com/python/tzdata
tzdata
# HTML sanitizer
# https://github.com/mozilla/bleach
bleach

View File

@ -11,7 +11,7 @@ from markdown import markdown
from netbox.config import get_config
from utilities.markdown import StrikethroughExtension
from utilities.utils import foreground_color
from utilities.utils import clean_html, foreground_color
register = template.Library()
@ -144,18 +144,6 @@ def render_markdown(value):
{{ md_source_text|markdown }}
"""
schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
# Strip HTML tags
value = strip_tags(value)
# Sanitize Markdown links
pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
# Sanitize Markdown reference links
pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
# Render Markdown
html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
@ -164,6 +152,11 @@ def render_markdown(value):
if html:
html = f'<div class="rendered-markdown">{html}</div>'
schemes = get_config().ALLOWED_URL_SCHEMES
# Sanitize HTML
html = clean_html(html, schemes)
return mark_safe(html)

View File

@ -4,6 +4,7 @@ from collections import OrderedDict
from decimal import Decimal
from itertools import count, groupby
import bleach
from django.core.serializers import serialize
from django.db.models import Count, OuterRef, Subquery
from django.db.models.functions import Coalesce
@ -385,3 +386,33 @@ def copy_safe_request(request):
'path': request.path,
'id': getattr(request, 'id', None), # UUID assigned by middleware
})
def clean_html(html, schemes):
"""
Sanitizes HTML based on a whitelist of allowed tags and attributes.
Also takes a list of allowed URI schemes.
"""
ALLOWED_TAGS = [
"div", "pre", "code", "blockquote", "del",
"hr", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "li", "p", "br",
"strong", "em", "a", "b", "i", "img",
"table", "thead", "tbody", "tr", "th", "td",
"dl", "dt", "dd",
]
ALLOWED_ATTRIBUTES = {
"div": ['class'],
"h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
"a": ["href", "title"],
"img": ["src", "title", "alt"],
}
return bleach.clean(
html,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
protocols=schemes
)

View File

@ -1,3 +1,4 @@
bleach==5.0.0
Django==4.0.4
django-cors-headers==3.12.0
django-debug-toolbar==3.2.4