From aab062a11e31a9c68ae8c6f3ca62d8a228de7093 Mon Sep 17 00:00:00 2001
From: Klaas van Schelven <klaas@vanschelven.com>
Date: Fri, 7 Nov 2025 22:52:11 +0100
Subject: [PATCH] Brotli streaming decompression: use output_buffer_limit

This became possible with brotli 1.2.0

It is my understanding pre-this change there was basically no
enforced limit on the amount of bytes "coming out" of
decompressor.process(); in other words: chunk size did not
apply to the most relevant (potentially blowing up) part of
the equation.

We had a MaxDataReader in place, but that would come "too late"
since all the mem-consuming stuff would happen right in brotli_generator
before any limiting would be possible.

See https://github.com/google/brotli/issues/1381
---
 bugsink/streams.py | 21 +++++++++++++--------
 bugsink/tests.py   | 17 +++++++++++++++++
 requirements.txt   |  2 +-
 3 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/bugsink/streams.py b/bugsink/streams.py
index 295fa56..864927e 100644
--- a/bugsink/streams.py
+++ b/bugsink/streams.py
@@ -3,7 +3,6 @@ import io
 import brotli
 
 from bugsink.app_settings import get_settings
-from bugsink.utils import assert_
 
 
 DEFAULT_CHUNK_SIZE = 8 * 1024
@@ -39,15 +38,21 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE):
 
 def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE):
     decompressor = brotli.Decompressor()
+    input_is_finished = False
 
-    while True:
-        compressed_chunk = input_stream.read(chunk_size)
-        if not compressed_chunk:
-            break
+    while not (decompressor.is_finished() and input_is_finished):
+        if decompressor.can_accept_more_data():
+            compressed_chunk = input_stream.read(chunk_size)
+            if not compressed_chunk:
+                input_is_finished = True
+                data = decompressor.process(b"", output_buffer_limit=chunk_size)  # b"": no input available, "drain"
+            else:
+                data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size)
+        else:
+            data = decompressor.process(b"", output_buffer_limit=chunk_size)  # b"" compressor cannot accept more input
 
-        yield decompressor.process(compressed_chunk)
-
-    assert_(decompressor.is_finished())
+        if data:
+            yield data
 
 
 class GeneratorReader:
diff --git a/bugsink/tests.py b/bugsink/tests.py
index 27f3781..c2b9fef 100644
--- a/bugsink/tests.py
+++ b/bugsink/tests.py
@@ -93,6 +93,23 @@ class StreamsTestCase(RegularTestCase):
 
         self.assertEqual(myself_times_ten, result)
 
+    def test_decompress_brotli_tiny_bomb(self):
+        # by picking something "sufficiently large" we can ensure all three code paths in brotli_generator are taken,
+        # in particular the "cannot accept more input" path. (for it to be taken, we need a "big thing" on the output
+        # side)
+        compressed_stream = io.BytesIO(brotli.compress(b"\x00" * 15_000_000))
+
+        result = b""
+        reader = GeneratorReader(brotli_generator(compressed_stream))
+
+        while True:
+            chunk = reader.read(3)
+            result += chunk
+            if chunk == b"":
+                break
+
+        self.assertEqual(b"\x00" * 15_000_000, result)
+
     def test_compress_decompress_read_none(self):
         with open(__file__, 'rb') as f:
             myself_times_ten = f.read() * 10
diff --git a/requirements.txt b/requirements.txt
index 33292b3..8f0b9eb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,7 @@ semver==3.0.*
 django-admin-autocomplete-filter==0.7.*
 pygments==2.19.*
 inotify_simple==2.0.*
-Brotli==1.1.*
+Brotli==1.2.*
 python-dateutil==2.9.*
 whitenoise==6.11.*
 requests==2.32.*