mirror of
https://github.com/netbox-community/netbox.git
synced 2024-05-10 07:54:54 +00:00
Sanitize HTML after rendering markdown
This commit is contained in:
@ -125,3 +125,7 @@ tablib
|
||||
# Timezone data (required by django-timezone-field on Python 3.9+)
|
||||
# https://github.com/python/tzdata
|
||||
tzdata
|
||||
|
||||
# HTML sanitizer
|
||||
# https://github.com/mozilla/bleach
|
||||
bleach
|
@ -11,7 +11,7 @@ from markdown import markdown
|
||||
|
||||
from netbox.config import get_config
|
||||
from utilities.markdown import StrikethroughExtension
|
||||
from utilities.utils import foreground_color
|
||||
from utilities.utils import clean_html, foreground_color
|
||||
|
||||
register = template.Library()
|
||||
|
||||
@ -144,18 +144,6 @@ def render_markdown(value):
|
||||
|
||||
{{ md_source_text|markdown }}
|
||||
"""
|
||||
schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
|
||||
|
||||
# Strip HTML tags
|
||||
value = strip_tags(value)
|
||||
|
||||
# Sanitize Markdown links
|
||||
pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
|
||||
value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
|
||||
|
||||
# Sanitize Markdown reference links
|
||||
pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
|
||||
value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
|
||||
|
||||
# Render Markdown
|
||||
html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
|
||||
@ -164,6 +152,11 @@ def render_markdown(value):
|
||||
if html:
|
||||
html = f'<div class="rendered-markdown">{html}</div>'
|
||||
|
||||
schemes = get_config().ALLOWED_URL_SCHEMES
|
||||
|
||||
# Sanitize HTML
|
||||
html = clean_html(html, schemes)
|
||||
|
||||
return mark_safe(html)
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@ from collections import OrderedDict
|
||||
from decimal import Decimal
|
||||
from itertools import count, groupby
|
||||
|
||||
import bleach
|
||||
from django.core.serializers import serialize
|
||||
from django.db.models import Count, OuterRef, Subquery
|
||||
from django.db.models.functions import Coalesce
|
||||
@ -385,3 +386,33 @@ def copy_safe_request(request):
|
||||
'path': request.path,
|
||||
'id': getattr(request, 'id', None), # UUID assigned by middleware
|
||||
})
|
||||
|
||||
|
||||
def clean_html(html, schemes):
|
||||
"""
|
||||
Sanitizes HTML based on a whitelist of allowed tags and attributes.
|
||||
Also takes a list of allowed URI schemes.
|
||||
"""
|
||||
|
||||
ALLOWED_TAGS = [
|
||||
"div", "pre", "code", "blockquote", "del",
|
||||
"hr", "h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"ul", "ol", "li", "p", "br",
|
||||
"strong", "em", "a", "b", "i", "img",
|
||||
"table", "thead", "tbody", "tr", "th", "td",
|
||||
"dl", "dt", "dd",
|
||||
]
|
||||
|
||||
ALLOWED_ATTRIBUTES = {
|
||||
"div": ['class'],
|
||||
"h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
|
||||
"a": ["href", "title"],
|
||||
"img": ["src", "title", "alt"],
|
||||
}
|
||||
|
||||
return bleach.clean(
|
||||
html,
|
||||
tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
protocols=schemes
|
||||
)
|
||||
|
@ -1,3 +1,4 @@
|
||||
bleach==5.0.0
|
||||
Django==4.0.4
|
||||
django-cors-headers==3.12.0
|
||||
django-debug-toolbar==3.2.4
|
||||
|
Reference in New Issue
Block a user