diff --git a/bugsink/middleware.py b/bugsink/middleware.py index c7a8c9d..0aa63fd 100644 --- a/bugsink/middleware.py +++ b/bugsink/middleware.py @@ -31,6 +31,7 @@ class ContentEncodingCheckMiddleware: SUPPORTED_VIEWS = [ "ingest-store", "ingest-envelope", + "ingest-minidump", ] def __init__(self, get_response): diff --git a/bugsink/streams.py b/bugsink/streams.py index 2453f08..c542009 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -5,6 +5,7 @@ import io import brotli from bugsink.app_settings import get_settings +from bugsink.utils import assert_ DEFAULT_CHUNK_SIZE = 8 * 1024 @@ -119,22 +120,65 @@ class GeneratorReader: del self.buffer[:size] return result + def readline(self, size=-1): + newline_index = self.buffer.find(b"\n") + while newline_index == -1: + chunk = self.read(DEFAULT_CHUNK_SIZE) + if not chunk: + break + self.buffer.extend(chunk) + newline_index = self.buffer.find(b"\n") + + if newline_index != -1: + end = newline_index + 1 + else: + end = len(self.buffer) + + if size >= 0: + end = min(end, size) + + result = bytes(self.buffer[:end]) + del self.buffer[:end] + return result + def content_encoding_reader(request): encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower() - if encoding == "gzip": - return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP), bad_request_exceptions=(zlib.error,)) + return GeneratorReader( + zlib_generator(request._stream, WBITS_PARAM_FOR_GZIP), + bad_request_exceptions=(zlib.error,), + ) if encoding == "deflate": - return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE), bad_request_exceptions=(zlib.error,)) + return GeneratorReader( + zlib_generator(request._stream, WBITS_PARAM_FOR_DEFLATE), + bad_request_exceptions=(zlib.error,) + ) if encoding == "br": - return GeneratorReader(brotli_generator(request), bad_request_exceptions=(brotli.error, BrotliError)) + return GeneratorReader( + brotli_generator(request._stream), + bad_request_exceptions=(brotli.error, BrotliError) + ) return request +def handle_request_content_encoding(request): + """Turns a request w/ Content-Encoding into an unpacked equivalent; for further "regular" (POST, FILES) handling + by Django. + """ + + encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower() + if encoding in ["gzip", "deflate", "br"]: + assert_(not request._read_started) + request._stream = content_encoding_reader(request) + + request.META["CONTENT_LENGTH"] = str(pow(2, 32) - 1) # large enough (we can't predict the decompressed value) + request.META.pop("HTTP_CONTENT_ENCODING") # the resulting request is no longer encoded + + def compress_with_zlib(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): # mostly useful for testing (compress-decompress cycles) diff --git a/ingest/urls.py b/ingest/urls.py index d98eaf3..202bc2c 100644 --- a/ingest/urls.py +++ b/ingest/urls.py @@ -8,5 +8,5 @@ urlpatterns = [ path("/envelope/", IngestEnvelopeAPIView.as_view(), name="ingest-envelope"), # is this "ingest"? it is at least in the sense that it matches the API schema and downstream auth etc. - path("/minidump/", MinidumpAPIView.as_view()), + path("/minidump/", MinidumpAPIView.as_view(), name="ingest-minidump"), ] diff --git a/ingest/views.py b/ingest/views.py index 95478e7..d286db5 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -29,7 +29,9 @@ from issues.regressions import issue_is_regression from bugsink.transaction import immediate_atomic, delay_on_commit from bugsink.exceptions import ViolatedExpectation -from bugsink.streams import content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded +from bugsink.streams import ( + content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded, + handle_request_content_encoding) from bugsink.app_settings import get_settings from events.models import Event @@ -732,6 +734,10 @@ class MinidumpAPIView(BaseIngestAPIView): def post(self, request, project_pk=None): # not reusing the CORS stuff here; minidump-from-browser doesn't make sense. + # TODO: actually implement max (we just use Django defaults now, which will switch to write-to-tmp after 2.5M + # for the file, but this can still swamp your CPU/tmp dir. + handle_request_content_encoding(request) + ingested_at = datetime.now(timezone.utc) project = self.get_project_for_request(project_pk, request)