From 8254e707b6ba5a3069ebaf70cf3bc5bacaa5fb16 Mon Sep 17 00:00:00 2001 From: Arthur Hanson Date: Thu, 11 Jan 2024 06:31:32 -0800 Subject: [PATCH] 12851 replace bleach with nh3 (#14767) * 12851 replace bleach with nh3 * Move tags & attributes lists to constants.py --------- Co-authored-by: Jeremy Stretch --- base_requirements.txt | 8 ++++---- netbox/utilities/constants.py | 24 ++++++++++++++++++++++++ netbox/utilities/utils.py | 30 ++++++------------------------ requirements.txt | 2 +- 4 files changed, 35 insertions(+), 29 deletions(-) diff --git a/base_requirements.txt b/base_requirements.txt index 0c7e54b13..87a3066c4 100644 --- a/base_requirements.txt +++ b/base_requirements.txt @@ -1,7 +1,3 @@ -# HTML sanitizer -# https://github.com/mozilla/bleach/blob/main/CHANGES -bleach - # The Python web framework on which NetBox is built # https://docs.djangoproject.com/en/stable/releases/ Django<5.1 @@ -108,6 +104,10 @@ mkdocstrings[python-legacy] # https://github.com/netaddr/netaddr/blob/master/CHANGELOG netaddr +# Python bindings to the ammonia HTML sanitization library. +# https://github.com/messense/nh3 +nh3 + # Fork of PIL (Python Imaging Library) for image processing # https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst Pillow diff --git a/netbox/utilities/constants.py b/netbox/utilities/constants.py index 345894065..c7c26f6b3 100644 --- a/netbox/utilities/constants.py +++ b/netbox/utilities/constants.py @@ -69,3 +69,27 @@ CSV_DELIMITERS = { 'semicolon': ';', 'tab': '\t', } + + +# +# HTML allowed tags & attributes +# + +HTML_ALLOWED_TAGS = { + "a", "b", "blockquote", "br", "code", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", + "hr", "i", "img", "li", "ol", "p", "pre", "strong", "table", "tbody", "td", "th", "thead", "tr", "ul" +} + +HTML_ALLOWED_ATTRIBUTES = { + "a": {"href", "title"}, + "div": {"class"}, + "h1": {"id"}, + "h2": {"id"}, + "h3": {"id"}, + "h4": {"id"}, + "h5": {"id"}, + "h6": {"id"}, + "img": {"alt", "src", "title"}, + "td": {"align"}, + "th": {"align"}, +} diff --git a/netbox/utilities/utils.py b/netbox/utilities/utils.py index f3f8c7c50..ca05c35bb 100644 --- a/netbox/utilities/utils.py +++ b/netbox/utilities/utils.py @@ -1,11 +1,11 @@ import datetime import decimal import json +import nh3 import re from decimal import Decimal from itertools import count, groupby -import bleach from django.contrib.contenttypes.models import ContentType from django.core import serializers from django.db.models import Count, ManyToOneRel, OuterRef, Subquery @@ -24,6 +24,7 @@ from netbox.config import get_config from netbox.plugins import PluginConfig from urllib.parse import urlencode from utilities.constants import HTTP_REQUEST_META_SAFE_COPY +from .constants import HTML_ALLOWED_ATTRIBUTES, HTML_ALLOWED_TAGS def title(value): @@ -511,30 +512,11 @@ def clean_html(html, schemes): Sanitizes HTML based on a whitelist of allowed tags and attributes. Also takes a list of allowed URI schemes. """ - - ALLOWED_TAGS = { - "div", "pre", "code", "blockquote", "del", - "hr", "h1", "h2", "h3", "h4", "h5", "h6", - "ul", "ol", "li", "p", "br", - "strong", "em", "a", "b", "i", "img", - "table", "thead", "tbody", "tr", "th", "td", - "dl", "dt", "dd", - } - - ALLOWED_ATTRIBUTES = { - "div": ['class'], - "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"], - "a": ["href", "title"], - "img": ["src", "title", "alt"], - "th": ["align"], - "td": ["align"], - } - - return bleach.clean( + return nh3.clean( html, - tags=ALLOWED_TAGS, - attributes=ALLOWED_ATTRIBUTES, - protocols=schemes + tags=HTML_ALLOWED_TAGS, + attributes=HTML_ALLOWED_ATTRIBUTES, + url_schemes=set(schemes) ) diff --git a/requirements.txt b/requirements.txt index 788a22f9c..cc733d4b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -bleach==6.1.0 Django==5.0.1 django-cors-headers==4.3.1 django-debug-toolbar==4.2.0 @@ -24,6 +23,7 @@ Markdown==3.5.1 mkdocs-material==9.5.3 mkdocstrings[python-legacy]==0.24.0 netaddr==0.9.0 +nh3==0.2.15 Pillow==10.1.0 psycopg[binary,pool]==3.1.16 PyYAML==6.0.1