From 72aab81d7d8bfca16fd1565d1ab9ab55ad962cfb Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Tue, 11 Nov 2025 13:39:44 +0100 Subject: [PATCH] Add ContentEncodingCheckMiddleware --- bugsink/middleware.py | 37 +++++++++++++++++++++++++++++++++++++ bugsink/settings/default.py | 1 + bugsink/tests.py | 7 +++++++ ingest/tests.py | 5 ++++- ingest/urls.py | 4 ++-- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/bugsink/middleware.py b/bugsink/middleware.py index 807f617..c7a8c9d 100644 --- a/bugsink/middleware.py +++ b/bugsink/middleware.py @@ -9,11 +9,48 @@ from django.utils.translation import get_supported_language_variant from django.utils.translation.trans_real import parse_accept_lang_header from django.utils import translation from django.urls import get_script_prefix +from django.http import HttpResponseBadRequest performance_logger = logging.getLogger("bugsink.performance.views") +class ContentEncodingCheckMiddleware: + """ + We don't just globally interpret Content-Encoding for all views since: + + 1. this increases our attack service (or forces us to reason about how it doesn't) + 2. forces us to think about the interplay of Django's POST/FILES handling and maximums (DATA_UPLOAD_MAX_MEMORY_SIZE) + and our own maximums and handling. + 3. the various maximums for reading from streaming requests are per-view (underlying data-type) anyway. + + Instead, the only global thing we do is "fail explicitly". + """ + + # NOTE: once this list becomes long, we could switch to a per-view decorator (with the maximum bytes as a value) + SUPPORTED_VIEWS = [ + "ingest-store", + "ingest-envelope", + ] + + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + return self.get_response(request) + + def process_view(self, request, view_func, view_args, view_kwargs): + if request.resolver_match: + view_name = request.resolver_match.view_name + else: + view_name = "[unknown]" + + if "HTTP_CONTENT_ENCODING" in request.META and view_name not in self.SUPPORTED_VIEWS: + return HttpResponseBadRequest(f"Content-Encoding handling is not supported for endpoint `{view_name}`") + + return None # proceed normally + + class DisallowChunkedMiddleware: def __init__(self, get_response): self.get_response = get_response diff --git a/bugsink/settings/default.py b/bugsink/settings/default.py index 4591012..60867ba 100644 --- a/bugsink/settings/default.py +++ b/bugsink/settings/default.py @@ -129,6 +129,7 @@ AUTH_USER_MODEL = "users.User" TAILWIND_APP_NAME = 'theme' MIDDLEWARE = [ + "bugsink.middleware.ContentEncodingCheckMiddleware", 'bugsink.middleware.SetRemoteAddrMiddleware', 'bugsink.middleware.DisallowChunkedMiddleware', 'django.middleware.security.SecurityMiddleware', diff --git a/bugsink/tests.py b/bugsink/tests.py index 865e7f7..7154a87 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -401,6 +401,13 @@ class SetRemoteAddrMiddlewareTestCase(RegularTestCase): SetRemoteAddrMiddleware.parse_x_forwarded_for("123.123.123.123,1.2.3.4") +class ContentEncodingCheckMiddlewareTestCase(DjangoTestCase): + + def test_speak_brotli_with_arbitrary_view_fails(self): + response = self.client.post("/", headers={"Content-Encoding": "br"}) + self.assertTrue(b"Content-Encoding handling is not supported for endpoint `home`" in response.content) + + class AllowedHostsMsgTestCase(DjangoTestCase): def test_allowed_hosts_error_message(self): diff --git a/ingest/tests.py b/ingest/tests.py index b245ad1..1719297 100644 --- a/ingest/tests.py +++ b/ingest/tests.py @@ -471,7 +471,7 @@ class IngestViewTestCase(TransactionTestCase): data_bytes = BROTLI_BOMB_4G t0 = time.time() - self.client.post( + response = self.client.post( f"/api/{ project.id }/envelope/", content_type="application/json", headers={ @@ -486,6 +486,9 @@ class IngestViewTestCase(TransactionTestCase): # the failing version is well above 5s (I just stopped the process after ~30s) self.fail("Brotli bomb caused excessive processing time: %d seconds" % (time.time() - t0)) + self.assertTrue(b"Max length" in response.content, response.content) + self.assertTrue(b"exceeded" in response.content, response.content) + @tag("samples") def test_filestore(self): # quick & dirty way to test the filestore; in absence of a proper test for it, we just run a more-or-less diff --git a/ingest/urls.py b/ingest/urls.py index 9d42b03..d98eaf3 100644 --- a/ingest/urls.py +++ b/ingest/urls.py @@ -4,8 +4,8 @@ from .views import IngestEventAPIView, IngestEnvelopeAPIView, MinidumpAPIView urlpatterns = [ # project_pk has to be an int per Sentry Client expectations. - path("/store/", IngestEventAPIView.as_view()), - path("/envelope/", IngestEnvelopeAPIView.as_view()), + path("/store/", IngestEventAPIView.as_view(), name="ingest-store"), + path("/envelope/", IngestEnvelopeAPIView.as_view(), name="ingest-envelope"), # is this "ingest"? it is at least in the sense that it matches the API schema and downstream auth etc. path("/minidump/", MinidumpAPIView.as_view()),