From 7bfad363760da5619b4372a6ac23eb05030ca5f2 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Thu, 25 Apr 2024 15:56:11 +0200 Subject: [PATCH] Implement brotli decompression --- bugsink/streams.py | 26 ++++++++++++++++++------ bugsink/tests.py | 27 ++++++++++++++++++++----- ingest/management/commands/send_json.py | 11 +++++++++- requirements.txt | 1 + 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/bugsink/streams.py b/bugsink/streams.py index 264d573..6472a44 100644 --- a/bugsink/streams.py +++ b/bugsink/streams.py @@ -1,5 +1,6 @@ import zlib import io +import brotli from bugsink.app_settings import get_settings @@ -35,10 +36,23 @@ def zlib_generator(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE): yield z.flush() -class ZLibReader: +def brotli_generator(input_stream, chunk_size=DEFAULT_CHUNK_SIZE): + decompressor = brotli.Decompressor() - def __init__(self, input_stream, wbits): - self.generator = zlib_generator(input_stream, wbits) + while True: + compressed_chunk = input_stream.read(chunk_size) + if not compressed_chunk: + break + + yield decompressor.process(compressed_chunk) + + assert decompressor.is_finished() + + +class GeneratorReader: + + def __init__(self, generator): + self.generator = generator self.unread = b"" def read(self, size=None): @@ -67,13 +81,13 @@ def content_encoding_reader(request): encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower() if encoding == "gzip": - return ZLibReader(request, WBITS_PARAM_FOR_GZIP) + return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP)) if encoding == "deflate": - return ZLibReader(request, WBITS_PARAM_FOR_DEFLATE) + return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE)) if encoding == "br": - raise NotImplementedError("Brotli not supported (yet)") + return GeneratorReader(brotli_generator(request)) return request diff --git a/bugsink/tests.py b/bugsink/tests.py index adecb40..4d64ed9 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -1,5 +1,6 @@ import io from datetime import datetime, timezone +import brotli from unittest import TestCase as RegularTestCase from django.test import TestCase as DjangoTestCase @@ -14,8 +15,8 @@ from .period_counter import PeriodCounter, _prev_tup, TL_DAY, TL_MONTH, TL_YEAR from .volume_based_condition import VolumeBasedCondition from .registry import PeriodCounterRegistry from .streams import ( - compress_with_zlib, ZLibReader, WBITS_PARAM_FOR_GZIP, WBITS_PARAM_FOR_DEFLATE, MaxDataReader, - MaxDataWriter) + compress_with_zlib, GeneratorReader, WBITS_PARAM_FOR_GZIP, WBITS_PARAM_FOR_DEFLATE, MaxDataReader, + MaxDataWriter, zlib_generator, brotli_generator) def apply_n(f, n, v): @@ -211,7 +212,7 @@ class StreamsTestCase(RegularTestCase): compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_GZIP)) result = b"" - reader = ZLibReader(compressed_stream, WBITS_PARAM_FOR_GZIP) + reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_GZIP)) while True: chunk = reader.read(3) @@ -228,7 +229,23 @@ class StreamsTestCase(RegularTestCase): compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_DEFLATE)) result = b"" - reader = ZLibReader(compressed_stream, WBITS_PARAM_FOR_DEFLATE) + reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_DEFLATE)) + + while True: + chunk = reader.read(3) + result += chunk + if chunk == b"": + break + + self.assertEquals(myself_times_ten, result) + + def test_compress_decompress_brotli(self): + myself_times_ten = open(__file__, 'rb').read() * 10 + + compressed_stream = io.BytesIO(brotli.compress(myself_times_ten)) + + result = b"" + reader = GeneratorReader(brotli_generator(compressed_stream)) while True: chunk = reader.read(3) @@ -245,7 +262,7 @@ class StreamsTestCase(RegularTestCase): compressed_stream = io.BytesIO(compress_with_zlib(plain_stream, WBITS_PARAM_FOR_DEFLATE)) result = b"" - reader = ZLibReader(compressed_stream, WBITS_PARAM_FOR_DEFLATE) + reader = GeneratorReader(zlib_generator(compressed_stream, WBITS_PARAM_FOR_DEFLATE)) result = reader.read(None) self.assertEquals(myself_times_ten, result) diff --git a/ingest/management/commands/send_json.py b/ingest/management/commands/send_json.py index defbf20..a0aa176 100644 --- a/ingest/management/commands/send_json.py +++ b/ingest/management/commands/send_json.py @@ -1,5 +1,6 @@ import io import uuid +import brotli import time import json @@ -139,12 +140,20 @@ class Command(BaseCommand): headers["Content-Encoding"] = "gzip" wbits = WBITS_PARAM_FOR_GZIP - else: + elif compress == "deflate": headers["Content-Encoding"] = "deflate" wbits = WBITS_PARAM_FOR_DEFLATE compressed_data = compress_with_zlib(io.BytesIO(data_bytes), wbits) + response = requests.post( + get_envelope_url(dsn) if use_envelope else get_store_url(dsn), + headers=headers, + data=compressed_data, + ) + elif compress == "br": + headers["Content-Encoding"] = "br" + compressed_data = brotli.compress(data_bytes) response = requests.post( get_envelope_url(dsn) if use_envelope else get_store_url(dsn), headers=headers, diff --git a/requirements.txt b/requirements.txt index 1165c96..9dec10b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ semver==3.0.* django-admin-autocomplete-filter==0.7.* pygments==2.16.* inotify_simple +brotli # testing/development only: requests