From 0bbb00c6ada7e17ec140a558732f38e11e2de89f Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Mon, 3 Nov 2025 21:04:28 +0100 Subject: [PATCH 01/16] smtp.EmailBackend in the docker conf: be explicit about it --- bugsink/conf_templates/docker.py.template | 1 + 1 file changed, 1 insertion(+) diff --git a/bugsink/conf_templates/docker.py.template b/bugsink/conf_templates/docker.py.template index f2b044b..c7959b6 100644 --- a/bugsink/conf_templates/docker.py.template +++ b/bugsink/conf_templates/docker.py.template @@ -106,6 +106,7 @@ else: if os.getenv("EMAIL_HOST"): + EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend" # default, here for explicitness EMAIL_HOST = os.getenv("EMAIL_HOST") EMAIL_HOST_USER = os.getenv("EMAIL_HOST_USER") EMAIL_HOST_PASSWORD = os.getenv("EMAIL_HOST_PASSWORD") From c38aace3aef62d2775d4c20639df34eed3df6658 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Mon, 3 Nov 2025 21:35:09 +0100 Subject: [PATCH 02/16] Add debug setting for email-sending Fix #86 --- bugsink/conf_templates/docker.py.template | 4 ++++ bugsink/conf_templates/singleserver.py.template | 4 ++++ bugsink/settings/default.py | 7 +++++++ bugsink/utils.py | 2 ++ 4 files changed, 17 insertions(+) diff --git a/bugsink/conf_templates/docker.py.template b/bugsink/conf_templates/docker.py.template index c7959b6..bceb20b 100644 --- a/bugsink/conf_templates/docker.py.template +++ b/bugsink/conf_templates/docker.py.template @@ -115,6 +115,10 @@ if os.getenv("EMAIL_HOST"): # True, we use that. EMAIL_USE_SSL = os.getenv("EMAIL_USE_SSL", "False").lower() in ("true", "1", "yes") EMAIL_USE_TLS = os.getenv("EMAIL_USE_TLS", str(not EMAIL_USE_SSL)).lower() in ("true", "1", "yes") + + if os.getenv("EMAIL_LOGGING", "false").lower() in ("true", "1", "yes"): + LOGGING['loggers']['bugsink.email']['level'] = "INFO" + else: # print("WARNING: EMAIL_HOST not set; email will not be sent") EMAIL_BACKEND = "bugsink.email_backends.QuietConsoleEmailBackend" diff --git a/bugsink/conf_templates/singleserver.py.template b/bugsink/conf_templates/singleserver.py.template index 854a482..06e51c4 100644 --- a/bugsink/conf_templates/singleserver.py.template +++ b/bugsink/conf_templates/singleserver.py.template @@ -71,6 +71,10 @@ EMAIL_BACKEND = "bugsink.email_backends.QuietConsoleEmailBackend" # instead of # EMAIL_USE_TLS = ... # EMAIL_USE_SSL = ... +# Uncomment the line below to show all sent emails in the logs +# LOGGING['loggers']['bugsink.email']['level'] = "INFO" + + SERVER_EMAIL = DEFAULT_FROM_EMAIL = "Bugsink " # constants for "create by" (user/team/project) settings diff --git a/bugsink/settings/default.py b/bugsink/settings/default.py index cc2b469..4591012 100644 --- a/bugsink/settings/default.py +++ b/bugsink/settings/default.py @@ -356,6 +356,13 @@ LOGGING['loggers']['bugsink.performance'] = { "propagate": False, } +# Email logging is hidden below WARNING by default, but this can be changed by setting the level to INFO. +LOGGING['loggers']['bugsink.email'] = { + "level": "WARNING", + "handlers": ["console"], + "propagate": False, +} + # Snappea Logging LOGGING["formatters"]["snappea"] = { "format": "{threadName} - {levelname:7} - {message}", diff --git a/bugsink/utils.py b/bugsink/utils.py index fbe4e57..6df2428 100644 --- a/bugsink/utils.py +++ b/bugsink/utils.py @@ -30,6 +30,8 @@ def send_rendered_email(subject, base_template_name, recipient_list, context=Non ) return + logger.info("Sending email with subject '%s' to %s", subject, recipient_list) + if context is None: context = {} From 1ea365e146f5ade1aa84171533056328a7547e75 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Tue, 4 Nov 2025 09:31:58 +0100 Subject: [PATCH 03/16] Typo in LICENSE --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 8203ade..08dc9b3 100644 --- a/LICENSE +++ b/LICENSE @@ -5,7 +5,7 @@ Portions of this software are licensed as follows: * All content that resides under the "ee/" directory of this repository, if that directory exists, is licensed under the license defined in "ee/LICENSE". -* All content that residues under the 'sentry' directory is Copyright 2019 Sentry +* All content that resides under the 'sentry' directory is Copyright 2019 Sentry (https://sentry.io) and individual contributors. (BSD 3-Clause License) * The icons in SVG format, directly included in various html templates, are from From 591b97042cc50f36e7bd7dd7da227eb37a812483 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 7 Nov 2025 19:59:23 +0100 Subject: [PATCH 04/16] docker-compose-sample.yaml: more clearly email:password See #261 --- docker-compose-sample.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-sample.yaml b/docker-compose-sample.yaml index ba4945a..62cb606 100644 --- a/docker-compose-sample.yaml +++ b/docker-compose-sample.yaml @@ -25,7 +25,7 @@ services: - "8000:8000" environment: SECRET_KEY: django-insecure-RMLYThim9NybWgXiUGat32Aa0Qbgqscf4NPDQuZO2glcZPOiXn # Change this (and remove django-insecure prefix), e.g. openssl rand -base64 50 - CREATE_SUPERUSER: admin:admin # Change this (or remove it and execute 'createsuperuser' against the running container) + CREATE_SUPERUSER: email:password # Change this (or remove it and execute 'createsuperuser' against the running container) PORT: 8000 DATABASE_URL: postgresql://bugsinkuser:your_super_secret_password@db:5432/bugsink # Change password to match POSTGRES_PASSWORD above BEHIND_HTTPS_PROXY: "false" # Change this for setups behind a proxy w/ ssl enabled From 3acd93856ddf6cd5eeec44ab6056d69c15932abb Mon Sep 17 00:00:00 2001 From: Erwin Oegema Date: Mon, 13 Oct 2025 14:01:17 +0000 Subject: [PATCH 05/16] Always migrate snappea database (cherry picked from commit 3c51122746db5b348ab95f289cef4efa9500d358) See #244 --- Dockerfile | 4 +--- Dockerfile.fromwheel | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9431503..9c9c4cb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,8 +54,6 @@ RUN groupadd --gid 14237 bugsink \ USER bugsink -RUN ["bugsink-manage", "migrate", "snappea", "--database=snappea"] - HEALTHCHECK CMD python -c 'import requests; requests.get("http://localhost:8000/health/ready").raise_for_status()' -CMD [ "monofy", "bugsink-show-version", "&&", "bugsink-manage", "check", "--deploy", "--fail-level", "WARNING", "&&", "bugsink-manage", "migrate", "&&", "bugsink-manage", "prestart", "&&", "gunicorn", "--config", "bugsink/gunicorn.docker.conf.py", "--bind=0.0.0.0:$PORT", "--access-logfile", "-", "bugsink.wsgi", "|||", "bugsink-runsnappea"] +CMD [ "monofy", "bugsink-show-version", "&&", "bugsink-manage", "check", "--deploy", "--fail-level", "WARNING", "&&", "bugsink-manage", "migrate", "snappea", "--database=snappea", "&&", "bugsink-manage", "migrate", "&&", "bugsink-manage", "prestart", "&&", "gunicorn", "--config", "bugsink/gunicorn.docker.conf.py", "--bind=0.0.0.0:$PORT", "--access-logfile", "-", "bugsink.wsgi", "|||", "bugsink-runsnappea"] diff --git a/Dockerfile.fromwheel b/Dockerfile.fromwheel index 94d42dc..6089dce 100644 --- a/Dockerfile.fromwheel +++ b/Dockerfile.fromwheel @@ -79,8 +79,6 @@ RUN groupadd --gid 14237 bugsink \ USER bugsink -RUN ["bugsink-manage", "migrate", "snappea", "--database=snappea"] - HEALTHCHECK CMD python -c 'import requests; requests.get("http://localhost:8000/health/ready").raise_for_status()' -CMD [ "monofy", "bugsink-show-version", "&&", "bugsink-manage", "check", "--deploy", "--fail-level", "WARNING", "&&", "bugsink-manage", "migrate", "&&", "bugsink-manage", "prestart", "&&", "gunicorn", "--config", "gunicorn.docker.conf.py", "--bind=0.0.0.0:$PORT", "--access-logfile", "-", "bugsink.wsgi", "|||", "bugsink-runsnappea"] +CMD [ "monofy", "bugsink-show-version", "&&", "bugsink-manage", "check", "--deploy", "--fail-level", "WARNING", "&&", "bugsink-manage", "migrate", "snappea", "--database=snappea", "&&", "bugsink-manage", "migrate", "&&", "bugsink-manage", "prestart", "&&", "gunicorn", "--config", "gunicorn.docker.conf.py", "--bind=0.0.0.0:$PORT", "--access-logfile", "-", "bugsink.wsgi", "|||", "bugsink-runsnappea"] From aab062a11e31a9c68ae8c6f3ca62d8a228de7093 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 7 Nov 2025 22:52:11 +0100 Subject: [PATCH 06/16] Brotli streaming decompression: use output_buffer_limit This became possible with brotli 1.2.0 It is my understanding pre-this change there was basically no enforced limit on the amount of bytes "coming out" of decompressor.process(); in other words: chunk size did not apply to the most relevant (potentially blowing up) part of the equation. We had a MaxDataReader in place, but that would come "too late" since all the mem-consuming stuff would happen right in brotli_generator before any limiting would be possible. See https://github.com/google/brotli/issues/1381 --- bugsink/streams.py | 21 +++++++++++++-------- bugsink/tests.py | 17 +++++++++++++++++ requirements.txt | 2 +- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 295fa56..864927e 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -3,7 +3,6 @@ import io import brotli from bugsink.app_settings import get_settings -from bugsink.utils import assert_ DEFAULT_CHUNK_SIZE = 8 * 1024 @@ -39,15 +38,21 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): decompressor = brotli.Decompressor() + input_is_finished = False - while True: - compressed_chunk = input_stream.read(chunk_size) - if not compressed_chunk: - break + while not (decompressor.is_finished() and input_is_finished): + if decompressor.can_accept_more_data(): + compressed_chunk = input_stream.read(chunk_size) + if not compressed_chunk: + input_is_finished = True + data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"": no input available, "drain" + else: + data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size) + else: + data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"" compressor cannot accept more input - yield decompressor.process(compressed_chunk) - - assert_(decompressor.is_finished()) + if data: + yield data class GeneratorReader: diff --git a/bugsink/tests.py b/bugsink/tests.py index 27f3781..c2b9fef 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -93,6 +93,23 @@ class StreamsTestCase(RegularTestCase): self.assertEqual(myself_times_ten, result) + def test_decompress_brotli_tiny_bomb(self): + # by picking something "sufficiently large" we can ensure all three code paths in brotli_generator are taken, + # in particular the "cannot accept more input" path. (for it to be taken, we need a "big thing" on the output + # side) + compressed_stream = io.BytesIO(brotli.compress(b"\x00" * 15_000_000)) + + result = b"" + reader = GeneratorReader(brotli_generator(compressed_stream)) + + while True: + chunk = reader.read(3) + result += chunk + if chunk == b"": + break + + self.assertEqual(b"\x00" * 15_000_000, result) + def test_compress_decompress_read_none(self): with open(__file__, 'rb') as f: myself_times_ten = f.read() * 10 diff --git a/requirements.txt b/requirements.txt index 33292b3..8f0b9eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ semver==3.0.* django-admin-autocomplete-filter==0.7.* pygments==2.19.* inotify_simple==2.0.* -Brotli==1.1.* +Brotli==1.2.* python-dateutil==2.9.* whitenoise==6.11.* requests==2.32.* From 26f327a2578472fb7c2641161c9ae7084b3a200e Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 7 Nov 2025 23:48:06 +0100 Subject: [PATCH 07/16] GeneratorReader: don't copy so much this was exposed when dealing with things that yield in very big chunks potentially (e.g. brotli bombs) tests are more directly on the GeneratorReader itself now rather than integrating this with particular genators-under-test. --- bugsink/streams.py | 19 +++++----- bugsink/tests.py | 93 ++++++++++++++++++++-------------------------- 2 files changed, 51 insertions(+), 61 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 864927e..8c8ecb8 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -56,30 +56,31 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): class GeneratorReader: + """Read from a generator as from a file-like object.""" def __init__(self, generator): self.generator = generator - self.unread = b"" + self.buffer = bytearray() def read(self, size=None): if size is None: for chunk in self.generator: - self.unread += chunk - - result = self.unread - self.unread = b"" + self.buffer.extend(chunk) + result = bytes(self.buffer) + self.buffer.clear() return result - while size > len(self.unread): + while len(self.buffer) < size: try: chunk = next(self.generator) - if chunk == b"": + if not chunk: break - self.unread += chunk + self.buffer.extend(chunk) except StopIteration: break - self.unread, result = self.unread[size:], self.unread[:size] + result = bytes(self.buffer[:size]) + del self.buffer[:size] return result diff --git a/bugsink/tests.py b/bugsink/tests.py index c2b9fef..621707b 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -43,55 +43,31 @@ class StreamsTestCase(RegularTestCase): def test_compress_decompress_gzip(self): with open(__file__, 'rb') as f: myself_times_ten = f.read() * 10 + plain_stream = io.BytesIO(myself_times_ten) - compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_GZIP)) - - result = b"" reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_GZIP)) - while True: - chunk = reader.read(3) - result += chunk - if chunk == b"": - break - - self.assertEqual(myself_times_ten, result) + self.assertEqual(myself_times_ten, reader.read()) def test_compress_decompress_deflate(self): with open(__file__, 'rb') as f: myself_times_ten = f.read() * 10 + plain_stream = io.BytesIO(myself_times_ten) - compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_DEFLATE)) - - result = b"" reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_DEFLATE)) - while True: - chunk = reader.read(3) - result += chunk - if chunk == b"": - break - - self.assertEqual(myself_times_ten, result) + self.assertEqual(myself_times_ten, reader.read()) def test_compress_decompress_brotli(self): with open(__file__, 'rb') as f: myself_times_ten = f.read() * 10 compressed_stream = io.BytesIO(brotli.compress(myself_times_ten)) - - result = b"" reader = GeneratorReader(brotli_generator(compressed_stream)) - while True: - chunk = reader.read(3) - result += chunk - if chunk == b"": - break - - self.assertEqual(myself_times_ten, result) + self.assertEqual(myself_times_ten, reader.read()) def test_decompress_brotli_tiny_bomb(self): # by picking something "sufficiently large" we can ensure all three code paths in brotli_generator are taken, @@ -99,29 +75,11 @@ class StreamsTestCase(RegularTestCase): # side) compressed_stream = io.BytesIO(brotli.compress(b"\x00" * 15_000_000)) - result = b"" - reader = GeneratorReader(brotli_generator(compressed_stream)) - - while True: - chunk = reader.read(3) - result += chunk - if chunk == b"": - break - - self.assertEqual(b"\x00" * 15_000_000, result) - - def test_compress_decompress_read_none(self): - with open(__file__, 'rb') as f: - myself_times_ten = f.read() * 10 - plain_stream = io.BytesIO(myself_times_ten) - - compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_DEFLATE)) - - result = b"" - reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_DEFLATE)) - - result = reader.read(None) - self.assertEqual(myself_times_ten, result) + size = 0 + generator = brotli_generator(compressed_stream) + for chunk in generator: + size += len(chunk) + self.assertEqual(15_000_000, size) def test_max_data_reader(self): stream = io.BytesIO(b"hello" * 100) @@ -160,6 +118,37 @@ class StreamsTestCase(RegularTestCase): with self.assertRaises(ValueError): writer.write(b"hellohello") + def test_generator_reader(self): + + def generator(): + yield b"hello " + yield b"I am " + yield b"a generator" + + reader = GeneratorReader(generator()) + + self.assertEqual(b"hel", reader.read(3)) + self.assertEqual(b"lo ", reader.read(3)) + self.assertEqual(b"I a", reader.read(3)) + self.assertEqual(b"m a", reader.read(3)) + self.assertEqual(b" generator", reader.read(None)) + + def test_generator_reader_performance(self): + # at least one test directly for GeneratorReader; doubles as a regression test for performance issue that showed + # up when the underlying generator yielded relatively big chunks and the read() size was small. should run + # easily under a second. + + def yielding_big_chunks(): + yield b"x" * 500_000 + + read = [] + reader = GeneratorReader(yielding_big_chunks()) + while True: + chunk = reader.read(1) + if chunk == b"": + break + read.append(chunk) + @override_settings(DEBUG_CSRF=True) class CSRFViewsTestCase(DjangoTestCase): From b44e5cd28f21c158d147d108c6bb04e7f7ec8572 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 00:59:10 +0100 Subject: [PATCH 08/16] Comments/docstrings --- bugsink/streams.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 8c8ecb8..25e45cd 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -37,6 +37,10 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): + # implementation notes: in principle chunk_size for input and output could be different, we keep them the same here. + # I've also seen that the actual output data may be quite a bit larger than the output_buffer_limit; a detail that + # I do not fully understand (but I understand that at least it's not _unboundedly_ larger). + decompressor = brotli.Decompressor() input_is_finished = False @@ -56,7 +60,7 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): class GeneratorReader: - """Read from a generator as from a file-like object.""" + """Read from a generator (yielding bytes) as from a file-like object.""" def __init__(self, generator): self.generator = generator From 6a37cefc09589d50f7087444a9bfc149bac5b0ca Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 11:49:04 +0100 Subject: [PATCH 09/16] 2.0.5 CHANGELOG --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 983fdc6..ca4652f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changes +## 2.0.5 (8 November 2025) + +### Security + +Add a mitigation for certain DOS attacks using adverserial brotli payloads, see #266 + +### Backwards incompatible changes + +Fail to start when using non-sqlite for snappea, See #252 + +Since this was always recommended against, and probably broken anyway, this is not +expected to be backwards incompatible _in practice_, but it is at least in prinicple. + + +### Other changes + +* Markdown stacktrace: render with all frames, See 9cb89ecf46a7 +* Add database vendor, version and machine arch to phonehome message, see d8fef759cabc +* Fix redirect on single-click actions when hosting at subdomain, Fix #250 +* 'poor mans's DB lock: lock the right DB; See e55c0eb417e2, and #252 for context +* Add more warnings about using non-sqlite for snappea in the conf templates, See #252 +* `parse_timestamp`: _actually_ parse as UTC when timezone not provided, see 8ad7f9738085 +* Add debug setting for email-sending, Fix #86 +* docker-compose-sample.yaml: more clearly email:password, See #261 +* create snappea database on Docker start rather than image build, See #244 + ## 2.0.4 (9 October 2025) * `convert_mariadb_uuids` command to fix UUID column problems on MariaDB From 1aa8e9589212bf4c98a0f3af8cfba227f85320b5 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 20:37:51 +0100 Subject: [PATCH 10/16] Assign local variable for easier debugging --- ingest/parsers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ingest/parsers.py b/ingest/parsers.py index 3cd419d..9789750 100644 --- a/ingest/parsers.py +++ b/ingest/parsers.py @@ -184,7 +184,8 @@ class StreamingEnvelopeParser: should_be_empty = io.BytesIO() self.remainder, self.at_eof = readuntil( self.input_stream, self.remainder, NewlineFinder(), should_be_empty, self.chunk_size) - if should_be_empty.getvalue() != b"": + should_be_empty_value = should_be_empty.getvalue() + if should_be_empty_value != b"": raise ParseError("Item with explicit length not terminated by newline/EOF") yield item_headers, item_output_stream From 1201f754e39265d2aac58edf49dc380bac334388 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 21:21:39 +0100 Subject: [PATCH 11/16] brotli decompress: avoid non-termination Analys by @Cycloctane -- thanks! --- bugsink/streams.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 25e45cd..8636ea7 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -23,6 +23,15 @@ class MaxLengthExceeded(ValueError): pass +class BrotliError(ValueError): + """similar to brotli.error, but separate from it, to clarify non-library failure""" + + +def brotli_assert(condition, message): + if not condition: + raise BrotliError(message) + + def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): z = zlib.decompressobj(wbits=wbits) @@ -41,19 +50,32 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): # I've also seen that the actual output data may be quite a bit larger than the output_buffer_limit; a detail that # I do not fully understand (but I understand that at least it's not _unboundedly_ larger). + # Peppered with assertions b/c the brotli package is ill-documented. + decompressor = brotli.Decompressor() input_is_finished = False while not (decompressor.is_finished() and input_is_finished): if decompressor.can_accept_more_data(): compressed_chunk = input_stream.read(chunk_size) - if not compressed_chunk: + if compressed_chunk: + data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size) + # assertion on data here? I'm not sure yet whether we actually hard-expect it. OK, you were ready to + # accept input, and you got it. Does it mean you have output per se? In the limit (a single compressed + # byte) one would say that the answer is "no". + + else: input_is_finished = True data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"": no input available, "drain" - else: - data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size) + brotli_assert( + len(data) or decompressor.is_finished(), + "Draining done -> decompressor finished; if not, something's off") + else: data = decompressor.process(b"", output_buffer_limit=chunk_size) # b"" compressor cannot accept more input + brotli_assert( + len(data) > 0, + "A brotli processor that cannot accept input _must_ be able to produce output or it would be stuck.") if data: yield data From c63e23f096616cc6b14d2a363f3b6b305a567500 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 21:52:39 +0100 Subject: [PATCH 12/16] Clarify why I believe the curren solution will not be stuck forever --- bugsink/streams.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 8636ea7..d6d24f7 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -50,8 +50,9 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): # I've also seen that the actual output data may be quite a bit larger than the output_buffer_limit; a detail that # I do not fully understand (but I understand that at least it's not _unboundedly_ larger). - # Peppered with assertions b/c the brotli package is ill-documented. - + # The brotli_assertions in the below are designed to guarantee that progress towards termination is made. In short: + # when no progress is made on the input_stream, either progress must be made on the output_stream or we must be in + # finished state. decompressor = brotli.Decompressor() input_is_finished = False @@ -60,9 +61,8 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): compressed_chunk = input_stream.read(chunk_size) if compressed_chunk: data = decompressor.process(compressed_chunk, output_buffer_limit=chunk_size) - # assertion on data here? I'm not sure yet whether we actually hard-expect it. OK, you were ready to - # accept input, and you got it. Does it mean you have output per se? In the limit (a single compressed - # byte) one would say that the answer is "no". + # brotli_assert not needed: we made progress on the `input_stream` in any case (we cannot infinitely be + # in this branch because the input_stream is finite). else: input_is_finished = True From 473d4de6d2de72a3bdee2905f540ce1afb67ce16 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 8 Nov 2025 23:36:48 +0100 Subject: [PATCH 13/16] 2.0.6 CHANGELOG --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca4652f..2d4637c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changes +## 2.0.6 (8 November 2025) + +### Security + +Add a mitigation for another DOS attack using adverserial brotli payloads. +Similar to, but distinct from, the fix in 2.0.5. + ## 2.0.5 (8 November 2025) ### Security From 53bea102d911013ae387f290264deefe8aa93cf1 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sun, 9 Nov 2025 20:50:06 +0100 Subject: [PATCH 14/16] Compression decoding errors: return 400 rather than 500 --- bugsink/streams.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index d6d24f7..ba06f10 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -1,3 +1,5 @@ +from django.core.exceptions import BadRequest + import zlib import io import brotli @@ -82,13 +84,21 @@ def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): class GeneratorReader: - """Read from a generator (yielding bytes) as from a file-like object.""" + """Read from a generator (yielding bytes) as from a file-like object. In practice: used by content_encoding_reader, + so it's grown to fit that use case (and we may later want to reflect that in the name).""" - def __init__(self, generator): + def __init__(self, generator, bad_request_exceptions=()): self.generator = generator + self.bad_request_exceptions = bad_request_exceptions self.buffer = bytearray() def read(self, size=None): + try: + return self._read(size) + except self.bad_request_exceptions as e: + raise BadRequest(str(e)) from e + + def _read(self, size=None): if size is None: for chunk in self.generator: self.buffer.extend(chunk) @@ -114,13 +124,13 @@ def content_encoding_reader(request): encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower() if encoding == "gzip": - return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP)) + return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP), bad_request_exceptions=(zlib.error,)) if encoding == "deflate": - return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE)) + return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE), bad_request_exceptions=(zlib.error,)) if encoding == "br": - return GeneratorReader(brotli_generator(request)) + return GeneratorReader(brotli_generator(request), bad_request_exceptions=(brotli.error, BrotliError)) return request From a6ead89ca8acd56036456bc43ce97601a66a2af7 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sun, 9 Nov 2025 20:58:39 +0100 Subject: [PATCH 15/16] Remove event.debug_info basically unused --- bsmain/management/commands/send_json.py | 5 +---- events/admin.py | 4 +--- .../migrations/0024_remove_event_debug_info.py | 17 +++++++++++++++++ events/models.py | 5 ----- ingest/tests.py | 4 ---- ingest/views.py | 5 ----- issues/tests.py | 4 +--- 7 files changed, 20 insertions(+), 24 deletions(-) create mode 100644 events/migrations/0024_remove_event_debug_info.py diff --git a/bsmain/management/commands/send_json.py b/bsmain/management/commands/send_json.py index 8460928..ad0e705 100644 --- a/bsmain/management/commands/send_json.py +++ b/bsmain/management/commands/send_json.py @@ -66,7 +66,7 @@ class Command(BaseCommand): dsn = os.environ["SENTRY_DSN"] else: raise CommandError( - "You must provide a DSN to send data to Sentry. Use --dsn or set SENTRY_DSN environment variable.") + "You must provide a DSN. Use --dsn or set SENTRY_DSN environment variable.") else: dsn = options['dsn'] @@ -134,9 +134,6 @@ class Command(BaseCommand): headers = { "Content-Type": "application/json", "X-Sentry-Auth": get_header_value(dsn), - # as it stands we always send identifier here, even if it's not a filename. Whether that's useful or - # annoying is an open question, but no reason to change it for now - "X-BugSink-DebugInfo": identifier, } if options["x_forwarded_for"]: diff --git a/events/admin.py b/events/admin.py index 73783c3..403dbfa 100644 --- a/events/admin.py +++ b/events/admin.py @@ -32,7 +32,7 @@ class EventAdmin(admin.ModelAdmin): ordering = ['-timestamp'] - search_fields = ['event_id', 'debug_info'] + search_fields = ['event_id'] list_display = [ 'timestamp', @@ -41,7 +41,6 @@ class EventAdmin(admin.ModelAdmin): 'level', 'sdk_name', 'sdk_version', - 'debug_info', 'on_site', ] @@ -73,7 +72,6 @@ class EventAdmin(admin.ModelAdmin): 'environment', 'sdk_name', 'sdk_version', - 'debug_info', 'pretty_data', ] diff --git a/events/migrations/0024_remove_event_debug_info.py b/events/migrations/0024_remove_event_debug_info.py new file mode 100644 index 0000000..3017642 --- /dev/null +++ b/events/migrations/0024_remove_event_debug_info.py @@ -0,0 +1,17 @@ +# Generated by Django 5.2 on 2025-11-09 19:56 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("events", "0023_event_remote_addr"), + ] + + operations = [ + migrations.RemoveField( + model_name="event", + name="debug_info", + ), + ] diff --git a/events/models.py b/events/models.py index 5fc039b..02364fd 100644 --- a/events/models.py +++ b/events/models.py @@ -117,9 +117,6 @@ class Event(models.Model): sdk_name = models.CharField(max_length=255, blank=True, null=False, default="") sdk_version = models.CharField(max_length=255, blank=True, null=False, default="") - # this is a temporary(?), bugsink-specific value; - debug_info = models.CharField(max_length=255, blank=True, null=False, default="") - # denormalized/cached fields: calculated_type = models.CharField(max_length=128, blank=True, null=False, default="") calculated_value = models.TextField(max_length=1024, blank=True, null=False, default="") @@ -239,8 +236,6 @@ class Event(models.Model): sdk_name=maybe_empty(parsed_data.get("", {}).get("name", ""))[:255], sdk_version=maybe_empty(parsed_data.get("", {}).get("version", ""))[:255], - debug_info=event_metadata["debug_info"][:255], - # just getting from the dict would be more precise, since we always add this info, but doing the .get() # allows for backwards compatability (digesting events for which the info was not added on-ingest) so # we'll take the defensive approach "for now" (until most everyone is on >= 1.7.4) diff --git a/ingest/tests.py b/ingest/tests.py index 3e0d634..8313a89 100644 --- a/ingest/tests.py +++ b/ingest/tests.py @@ -49,7 +49,6 @@ def _digest_params(event_data, project, request, now=None): "event_id": event_data["event_id"], "project_id": project.id, "ingested_at": format_timestamp(now), - "debug_info": "", }, "event_data": event_data, "digested_at": now, @@ -329,7 +328,6 @@ class IngestViewTestCase(TransactionTestCase): content_type="application/json", headers={ "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, }, data=data_bytes, ) @@ -380,7 +378,6 @@ class IngestViewTestCase(TransactionTestCase): content_type="application/json", headers={ "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, }, data=data_bytes, ) @@ -471,7 +468,6 @@ class IngestViewTestCase(TransactionTestCase): content_type="application/json", headers={ "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, }, data=data_bytes, ) diff --git a/ingest/views.py b/ingest/views.py index fdae6c5..d550ae2 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -170,10 +170,6 @@ class BaseIngestAPIView(View): @classmethod def get_event_meta(cls, event_id, ingested_at, request, project): - # Meta means: not part of the event data. Basically: information that is available at the time of ingestion, and - # that must be passed to digest() in a serializable form. - debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") - # .get(..) -- don't want to crash on this and it's non-trivial to find a source that tells me with certainty # that the REMOTE_ADDR is always in request.META (it probably is in practice) remote_addr = request.META.get("REMOTE_ADDR") @@ -182,7 +178,6 @@ class BaseIngestAPIView(View): "event_id": event_id, "project_id": project.id, "ingested_at": format_timestamp(ingested_at), - "debug_info": debug_info, "remote_addr": remote_addr, } diff --git a/issues/tests.py b/issues/tests.py index 4e01b04..1a1d743 100644 --- a/issues/tests.py +++ b/issues/tests.py @@ -521,7 +521,6 @@ class IntegrationTest(TransactionTestCase): content_type="application/json", headers={ "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, }, ) self.assertEqual( @@ -554,7 +553,7 @@ class IntegrationTest(TransactionTestCase): except Exception as e: # we want to know _which_ event failed, hence the raise-from-e here - raise AssertionError("Error rendering event %s" % event.debug_info) from e + raise AssertionError("Error rendering event") from e def test_render_stacktrace_md(self): user = User.objects.create_user(username='test', password='test') @@ -588,7 +587,6 @@ class IntegrationTest(TransactionTestCase): content_type="application/json", headers={ "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, }, ) self.assertEqual( From 0432451e8e8b1daeb35c276e28afdf1cac82d8bf Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sun, 9 Nov 2025 21:11:30 +0100 Subject: [PATCH 16/16] Fix inefficient bytes concatenation when KEEP_ENVELOPES != 0 --- ingest/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ingest/models.py b/ingest/models.py index 6d8ed80..6056488 100644 --- a/ingest/models.py +++ b/ingest/models.py @@ -10,7 +10,7 @@ logger = logging.getLogger("bugsink.ingest") class StoreEnvelope: def __init__(self, ingested_at, project_pk, request): - self._read = b"" + self._read = bytearray() self._ingested_at = ingested_at self._project_pk = project_pk @@ -20,7 +20,7 @@ class StoreEnvelope: def read(self, size): result = self.request.read(size) if result: - self._read += result + self._read.extend(result) return result def __getattr__(self, attr): @@ -33,7 +33,7 @@ class StoreEnvelope: @immediate_atomic() def store(self): # read the rest of the request; the regular .ingest() method breaks early by design - self._read += self.request.read() + self._read.extend(self.request.read()) if Envelope.objects.count() >= get_settings().KEEP_ENVELOPES: # >= b/c about to add # -1 because 0-indexed; we delete including the boundary, so we'll have space for the new one @@ -43,7 +43,7 @@ class StoreEnvelope: envelope = Envelope.objects.create( ingested_at=self._ingested_at, project_pk=self._project_pk, - data=self._read, + data=bytes(self._read), ) # arguably "debug", but if you turned StoreEnvelope on, you probably want to use its results "soon", and I'd