Brotli streaming decompression: use output_buffer_limit

This became possible with brotli 1.2.0

It is my understanding pre-this change there was basically no
enforced limit on the amount of bytes "coming out" of
decompressor.process(); in other words: chunk size did not
apply to the most relevant (potentially blowing up) part of
the equation.

We had a MaxDataReader in place, but that would come "too late"
since all the mem-consuming stuff would happen right in brotli_generator
before any limiting would be possible.

See https://github.com/google/brotli/issues/1381
This commit is contained in:
Klaas van Schelven
2025-11-07 22:52:11 +01:00
parent 2e882dc7c0
commit aab062a11e
3 changed files with 31 additions and 9 deletions

View File

@@ -3,7 +3,6 @@ import io
import brotli
from bugsink.app_settings import get_settings
from bugsink.utils import assert_
DEFAULT_CHUNK_SIZE = 8 * 1024
@@ -39,15 +38,21 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE):
def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE):
decompressor = brotli.Decompressor()
input_is_finished = False
while True:
compressed_chunk = input_stream.read(chunk_size)
if not compressed_chunk:
break
while not (decompressor.is_finished() and input_is_finished):
if decompressor.can_accept_more_data():
compressed_chunk = input_stream.read(chunk_size)
if not compressed_chunk:
input_is_finished = True
data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"": no input available, "drain"
else:
data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size)
else:
data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"" compressor cannot accept more input
yield decompressor.process(compressed_chunk)
assert_(decompressor.is_finished())
if data:
yield data
class GeneratorReader:

View File

@@ -93,6 +93,23 @@ class StreamsTestCase(RegularTestCase):
self.assertEqual(myself_times_ten, result)
def test_decompress_brotli_tiny_bomb(self):
# by picking something "sufficiently large" we can ensure all three code paths in brotli_generator are taken,
# in particular the "cannot accept more input" path. (for it to be taken, we need a "big thing" on the output
# side)
compressed_stream = io.BytesIO(brotli.compress(b"\x00" * 15_000_000))
result = b""
reader = GeneratorReader(brotli_generator(compressed_stream))
while True:
chunk = reader.read(3)
result += chunk
if chunk == b"":
break
self.assertEqual(b"\x00" * 15_000_000, result)
def test_compress_decompress_read_none(self):
with open(__file__, 'rb') as f:
myself_times_ten = f.read() * 10

View File

@@ -7,7 +7,7 @@ semver==3.0.*
django-admin-autocomplete-filter==0.7.*
pygments==2.19.*
inotify_simple==2.0.*
Brotli==1.1.*
Brotli==1.2.*
python-dateutil==2.9.*
whitenoise==6.11.*
requests==2.32.*