diff --git a/bugsink/app_settings.py b/bugsink/app_settings.py index b8ac7a3..79178e8 100644 --- a/bugsink/app_settings.py +++ b/bugsink/app_settings.py @@ -39,6 +39,7 @@ DEFAULTS = { # System inner workings: "DIGEST_IMMEDIATELY": True, "VALIDATE_ON_DIGEST": "none", # other legal values are "warn" and "strict" + "KEEP_ENVELOPES": 0, # set to a number to store that many; 0 means "store none". This is for debugging. # MAX* below mirror the (current) values for the Sentry Relay "MAX_EVENT_SIZE": _MEBIBYTE, diff --git a/bugsink/settings/development.py b/bugsink/settings/development.py index 6d28c96..5c00790 100644 --- a/bugsink/settings/development.py +++ b/bugsink/settings/development.py @@ -127,6 +127,8 @@ BUGSINK = { # will fit in the final version, so that's why it's not documented. "USE_ADMIN": True, "VALIDATE_ON_DIGEST": "strict", + + "KEEP_ENVELOPES": 10, } diff --git a/bugsink/urls.py b/bugsink/urls.py index 000d641..72b3706 100644 --- a/bugsink/urls.py +++ b/bugsink/urls.py @@ -9,6 +9,7 @@ from users.views import debug_email as debug_users_email from teams.views import debug_email as debug_teams_email from bugsink.app_settings import get_settings from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences +from ingest.views import download_envelope from .views import home, trigger_error, favicon, settings_view from .debug_views import csrf_debug @@ -39,6 +40,9 @@ urlpatterns = [ path('api/', include('ingest.urls')), + # not in /api/ because it's not part of the ingest API, but still part of the ingest app + path('ingest/envelope//', download_envelope, name='download_envelope'), + path('projects/', include('projects.urls')), path('teams/', include('teams.urls')), path('events/', include('events.urls')), diff --git a/ingest/admin.py b/ingest/admin.py index e69de29..e235ed4 100644 --- a/ingest/admin.py +++ b/ingest/admin.py @@ -0,0 +1,10 @@ +from django.contrib import admin + +from .models import Envelope + + +@admin.register(Envelope) +class EnvelopeAdmin(admin.ModelAdmin): + list_display = ("id", "project_pk", "ingested_at") + fields = ["project_pk", "ingested_at", "data"] + readonly_fields = ["project_pk", "ingested_at", "data"] diff --git a/ingest/migrations/0002_initial.py b/ingest/migrations/0002_initial.py new file mode 100644 index 0000000..4608480 --- /dev/null +++ b/ingest/migrations/0002_initial.py @@ -0,0 +1,37 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ("ingest", "0001_set_sqlite_wal"), + ] + + operations = [ + migrations.CreateModel( + name="Envelope", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("ingested_at", models.DateTimeField()), + ("project_pk", models.IntegerField()), + ("data", models.BinaryField()), + ], + options={ + "indexes": [ + models.Index( + fields=["ingested_at"], name="ingest_enve_ingeste_f13790_idx" + ) + ], + }, + ), + ] diff --git a/ingest/models.py b/ingest/models.py index e69de29..2389881 100644 --- a/ingest/models.py +++ b/ingest/models.py @@ -0,0 +1,83 @@ +import logging + +from django.db import models + +from bugsink.transaction import immediate_atomic +from bugsink.app_settings import get_settings + +logger = logging.getLogger("bugsink.ingest") + + +class StoreEnvelope: + def __init__(self, ingested_at, project_pk, request): + self._read = b"" + + self._ingested_at = ingested_at + self._project_pk = project_pk + + self.request = request + + def read(self, size): + result = self.request.read(size) + if result: + self._read += result + return result + + def __getattr__(self, attr): + return getattr(self.request, attr) + + # `immediate_atomic` here, rather than in the calling spot, to avoid its usage on the DontStoreEnvelope case. + # Also: all the transaction stuff is kinda overkill anyway, for something that's completely unconnected to our real + # data, i.e. can't really conflict... but in the sqlite world being explicit about where the transactions are is + # always a good thing, i.e. keeps them small) + @immediate_atomic() + def store(self): + # read the rest of the request; the regular .ingest() method breaks early by design + self._read += self.request.read() + + if Envelope.objects.count() >= get_settings().KEEP_ENVELOPES: # >= b/c about to add + # -1 because 0-indexed; we delete including the boundary, so we'll have space for the new one + boundary = Envelope.objects.order_by("-ingested_at")[get_settings().KEEP_ENVELOPES - 1] + Envelope.objects.filter(ingested_at__lte=boundary.ingested_at).delete() + + envelope = Envelope.objects.create( + ingested_at=self._ingested_at, + project_pk=self._project_pk, + data=self._read, + ) + + # arguably "debug", but if you turned StoreEnvelope on, you probably want to use its results "soon", and I'd + # rather not have another thing for people to configure. + logger.info("envelope stored: %s", envelope.pk) + + +class DontStoreEnvelope: + """conform to the same interface as StoreEnvelope, but don't store anything""" + def __init__(self, request): + self.request = request + + def __getattr__(self, attr): + return getattr(self.request, attr) + + def store(self): + pass + + +class Envelope(models.Model): + # id is implied which makes it an Integer. Great for sorting + + ingested_at = models.DateTimeField(blank=False, null=False) + + # we just use PK to avoid passing Projects around for debug code, and avoid FK-constraints too. + project_pk = models.IntegerField(blank=False) + + # binary, because we don't want to make any assumptions about what we get "over the wire" (whether it's even utf-8) + data = models.BinaryField(blank=False, null=False) + + class Meta: + indexes = [ + models.Index(fields=["ingested_at"]), + ] + + def get_absolute_url(self): + return f"/ingest/envelope/{self.pk}/" diff --git a/ingest/views.py b/ingest/views.py index 54b46be..46cbe96 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -15,6 +15,7 @@ from django.core.exceptions import ValidationError from django.http import HttpResponse, JsonResponse from django.views.decorators.csrf import csrf_exempt from django.utils.decorators import method_decorator +from django.contrib.auth.decorators import user_passes_test from compat.auth import parse_auth_header_value from compat.dsn import get_sentry_key @@ -39,6 +40,7 @@ from .parsers import StreamingEnvelopeParser, ParseError from .filestore import get_filename_for_event_id from .tasks import digest from .event_counter import check_for_thresholds +from .models import StoreEnvelope, DontStoreEnvelope, Envelope HTTP_429_TOO_MANY_REQUESTS = 429 @@ -458,12 +460,29 @@ class IngestEnvelopeAPIView(BaseIngestAPIView): def _post(self, request, project_pk=None): ingested_at = datetime.now(timezone.utc) + input_stream = MaxDataReader("MAX_ENVELOPE_SIZE", content_encoding_reader( + MaxDataReader("MAX_ENVELOPE_COMPRESSED_SIZE", request))) + + # note: we use the unvalidated (against DSN) "project_pk"; b/c of the debug-nature we assume "not a problem" + input_stream = StoreEnvelope(ingested_at, project_pk, input_stream) if get_settings().KEEP_ENVELOPES > 0 \ + else DontStoreEnvelope(input_stream) + + try: + return self._post2(request, input_stream, ingested_at, project_pk) + finally: + # storing stuff in the DB on-ingest (rather than on digest-only) is not "as architected"; it's OK because + # this is a debug-only thing. + # + # note: in finally, so this happens even for all paths, including errors and 404 (i.e. wrong DSN). By design + # b/c the error-paths are often the interesting ones when debugging. We even store when over quota (429), + # that's more of a trade-off to avoid adding extra complexity for a debug-tool. + input_stream.store() + + def _post2(self, request, input_stream, ingested_at, project_pk=None): # Note: wrapping the COMPRESSES_SIZE checks arount request makes it so that when clients do not compress their # requests, they are still subject to the (smaller) maximums that apply pre-uncompress. This is exactly what we # want. - parser = StreamingEnvelopeParser( - MaxDataReader("MAX_ENVELOPE_SIZE", content_encoding_reader( - MaxDataReader("MAX_ENVELOPE_COMPRESSED_SIZE", request)))) + parser = StreamingEnvelopeParser(input_stream) envelope_headers = parser.get_envelope_headers() @@ -558,3 +577,11 @@ class IngestEnvelopeAPIView(BaseIngestAPIView): # more stuff that we don't care about (up to 20MiB compressed) whereas the max event size (uncompressed) is 1MiB. # Another advantage: this allows us to raise the relevant Header parsing and size limitation Exceptions to the SDKs. # + + +@user_passes_test(lambda u: u.is_superuser) +def download_envelope(request, envelope_id=None): + envelope = get_object_or_404(Envelope, pk=envelope_id) + response = HttpResponse(envelope.data, content_type="application/x-sentry-envelope") + response["Content-Disposition"] = f'attachment; filename="envelope-{envelope_id}.json"' + return response