From aab062a11e31a9c68ae8c6f3ca62d8a228de7093 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 7 Nov 2025 22:52:11 +0100 Subject: [PATCH] Brotli streaming decompression: use output_buffer_limit This became possible with brotli 1.2.0 It is my understanding pre-this change there was basically no enforced limit on the amount of bytes "coming out" of decompressor.process(); in other words: chunk size did not apply to the most relevant (potentially blowing up) part of the equation. We had a MaxDataReader in place, but that would come "too late" since all the mem-consuming stuff would happen right in brotli_generator before any limiting would be possible. See https://github.com/google/brotli/issues/1381 --- bugsink/streams.py | 21 +++++++++++++-------- bugsink/tests.py | 17 +++++++++++++++++ requirements.txt | 2 +- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 295fa56..864927e 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -3,7 +3,6 @@ import io import brotli from bugsink.app_settings import get_settings -from bugsink.utils import assert_ DEFAULT_CHUNK_SIZE = 8 * 1024 @@ -39,15 +38,21 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): decompressor = brotli.Decompressor() + input_is_finished = False - while True: - compressed_chunk = input_stream.read(chunk_size) - if not compressed_chunk: - break + while not (decompressor.is_finished() and input_is_finished): + if decompressor.can_accept_more_data(): + compressed_chunk = input_stream.read(chunk_size) + if not compressed_chunk: + input_is_finished = True + data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"": no input available, "drain" + else: + data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size) + else: + data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"" compressor cannot accept more input - yield decompressor.process(compressed_chunk) - - assert_(decompressor.is_finished()) + if data: + yield data class GeneratorReader: diff --git a/bugsink/tests.py b/bugsink/tests.py index 27f3781..c2b9fef 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -93,6 +93,23 @@ class StreamsTestCase(RegularTestCase): self.assertEqual(myself_times_ten, result) + def test_decompress_brotli_tiny_bomb(self): + # by picking something "sufficiently large" we can ensure all three code paths in brotli_generator are taken, + # in particular the "cannot accept more input" path. (for it to be taken, we need a "big thing" on the output + # side) + compressed_stream = io.BytesIO(brotli.compress(b"\x00" * 15_000_000)) + + result = b"" + reader = GeneratorReader(brotli_generator(compressed_stream)) + + while True: + chunk = reader.read(3) + result += chunk + if chunk == b"": + break + + self.assertEqual(b"\x00" * 15_000_000, result) + def test_compress_decompress_read_none(self): with open(__file__, 'rb') as f: myself_times_ten = f.read() * 10 diff --git a/requirements.txt b/requirements.txt index 33292b3..8f0b9eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ semver==3.0.* django-admin-autocomplete-filter==0.7.* pygments==2.19.* inotify_simple==2.0.* -Brotli==1.1.* +Brotli==1.2.* python-dateutil==2.9.* whitenoise==6.11.* requests==2.32.*