minidump endpoint: support content encoding

adds readline() method to GeneratorReader (ChatGPT-generated; eyeballed for
correctness) to match the Django FILES/POST handling expectations.
This commit is contained in:
Klaas van Schelven
2025-11-11 13:50:07 +01:00
parent 72aab81d7d
commit 937df4cbb8
4 changed files with 57 additions and 6 deletions

View File

@@ -31,6 +31,7 @@ class ContentEncodingCheckMiddleware:
SUPPORTED_VIEWS = [
"ingest-store",
"ingest-envelope",
"ingest-minidump",
]
def __init__(self, get_response):

View File

@@ -5,6 +5,7 @@ import io
import brotli
from bugsink.app_settings import get_settings
from bugsink.utils import assert_
DEFAULT_CHUNK_SIZE = 8 * 1024
@@ -119,22 +120,65 @@ class GeneratorReader:
del self.buffer[:size]
return result
def readline(self, size=-1):
newline_index = self.buffer.find(b"\n")
while newline_index == -1:
chunk = self.read(DEFAULT_CHUNK_SIZE)
if not chunk:
break
self.buffer.extend(chunk)
newline_index = self.buffer.find(b"\n")
if newline_index != -1:
end = newline_index + 1
else:
end = len(self.buffer)
if size >= 0:
end = min(end, size)
result = bytes(self.buffer[:end])
del self.buffer[:end]
return result
def content_encoding_reader(request):
encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
if encoding == "gzip":
return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP), bad_request_exceptions=(zlib.error,))
return GeneratorReader(
zlib_generator(request._stream, WBITS_PARAM_FOR_GZIP),
bad_request_exceptions=(zlib.error,),
)
if encoding == "deflate":
return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE), bad_request_exceptions=(zlib.error,))
return GeneratorReader(
zlib_generator(request._stream, WBITS_PARAM_FOR_DEFLATE),
bad_request_exceptions=(zlib.error,)
)
if encoding == "br":
return GeneratorReader(brotli_generator(request), bad_request_exceptions=(brotli.error, BrotliError))
return GeneratorReader(
brotli_generator(request._stream),
bad_request_exceptions=(brotli.error, BrotliError)
)
return request
def handle_request_content_encoding(request):
"""Turns a request w/ Content-Encoding into an unpacked equivalent; for further "regular" (POST, FILES) handling
by Django.
"""
encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
if encoding in ["gzip", "deflate", "br"]:
assert_(not request._read_started)
request._stream = content_encoding_reader(request)
request.META["CONTENT_LENGTH"] = str(pow(2, 32) - 1) # large enough (we can't predict the decompressed value)
request.META.pop("HTTP_CONTENT_ENCODING") # the resulting request is no longer encoded
def compress_with_zlib(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE):
# mostly useful for testing (compress-decompress cycles)

View File

@@ -8,5 +8,5 @@ urlpatterns = [
path("<int:project_pk>/envelope/", IngestEnvelopeAPIView.as_view(), name="ingest-envelope"),
# is this "ingest"? it is at least in the sense that it matches the API schema and downstream auth etc.
path("<int:project_pk>/minidump/", MinidumpAPIView.as_view()),
path("<int:project_pk>/minidump/", MinidumpAPIView.as_view(), name="ingest-minidump"),
]

View File

@@ -29,7 +29,9 @@ from issues.regressions import issue_is_regression
from bugsink.transaction import immediate_atomic, delay_on_commit
from bugsink.exceptions import ViolatedExpectation
from bugsink.streams import content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded
from bugsink.streams import (
content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded,
handle_request_content_encoding)
from bugsink.app_settings import get_settings
from events.models import Event
@@ -732,6 +734,10 @@ class MinidumpAPIView(BaseIngestAPIView):
def post(self, request, project_pk=None):
# not reusing the CORS stuff here; minidump-from-browser doesn't make sense.
# TODO: actually implement max (we just use Django defaults now, which will switch to write-to-tmp after 2.5M
# for the file, but this can still swamp your CPU/tmp dir.
handle_request_content_encoding(request)
ingested_at = datetime.now(timezone.utc)
project = self.get_project_for_request(project_pk, request)